063898cae3e5c9e5a747bbdb7b1450f5680f6977
[releases.git] / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr)
85 {
86         return NULL;
87 }
88 #endif
89
90 /* Helper returning the inet6 address from a given tcp socket.
91  * It can be used in TCP stack instead of inet6_sk(sk).
92  * This avoids a dereference and allow compiler optimizations.
93  * It is a specialized version of inet6_sk_generic().
94  */
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98
99         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104         struct dst_entry *dst = skb_dst(skb);
105
106         if (dst && dst_hold_safe(dst)) {
107                 const struct rt6_info *rt = (const struct rt6_info *)dst;
108
109                 sk->sk_rx_dst = dst;
110                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112         }
113 }
114
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118                                 ipv6_hdr(skb)->saddr.s6_addr32,
119                                 tcp_hdr(skb)->dest,
120                                 tcp_hdr(skb)->source);
121 }
122
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126                                    ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130                               int addr_len)
131 {
132         /* This check is replicated from tcp_v6_connect() and intended to
133          * prevent BPF program called below from accessing bytes that are out
134          * of the bound specified by user in addr_len.
135          */
136         if (addr_len < SIN6_LEN_RFC2133)
137                 return -EINVAL;
138
139         sock_owned_by_me(sk);
140
141         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145                           int addr_len)
146 {
147         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148         struct inet_sock *inet = inet_sk(sk);
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151         struct tcp_sock *tp = tcp_sk(sk);
152         struct in6_addr *saddr = NULL, *final_p, final;
153         struct ipv6_txoptions *opt;
154         struct flowi6 fl6;
155         struct dst_entry *dst;
156         int addr_type;
157         int err;
158         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159
160         if (addr_len < SIN6_LEN_RFC2133)
161                 return -EINVAL;
162
163         if (usin->sin6_family != AF_INET6)
164                 return -EAFNOSUPPORT;
165
166         memset(&fl6, 0, sizeof(fl6));
167
168         if (np->sndflow) {
169                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170                 IP6_ECN_flow_init(fl6.flowlabel);
171                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172                         struct ip6_flowlabel *flowlabel;
173                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174                         if (IS_ERR(flowlabel))
175                                 return -EINVAL;
176                         fl6_sock_release(flowlabel);
177                 }
178         }
179
180         /*
181          *      connect() to INADDR_ANY means loopback (BSD'ism).
182          */
183
184         if (ipv6_addr_any(&usin->sin6_addr)) {
185                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187                                                &usin->sin6_addr);
188                 else
189                         usin->sin6_addr = in6addr_loopback;
190         }
191
192         addr_type = ipv6_addr_type(&usin->sin6_addr);
193
194         if (addr_type & IPV6_ADDR_MULTICAST)
195                 return -ENETUNREACH;
196
197         if (addr_type&IPV6_ADDR_LINKLOCAL) {
198                 if (addr_len >= sizeof(struct sockaddr_in6) &&
199                     usin->sin6_scope_id) {
200                         /* If interface is set while binding, indices
201                          * must coincide.
202                          */
203                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204                                 return -EINVAL;
205
206                         sk->sk_bound_dev_if = usin->sin6_scope_id;
207                 }
208
209                 /* Connect to link-local address requires an interface */
210                 if (!sk->sk_bound_dev_if)
211                         return -EINVAL;
212         }
213
214         if (tp->rx_opt.ts_recent_stamp &&
215             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216                 tp->rx_opt.ts_recent = 0;
217                 tp->rx_opt.ts_recent_stamp = 0;
218                 WRITE_ONCE(tp->write_seq, 0);
219         }
220
221         sk->sk_v6_daddr = usin->sin6_addr;
222         np->flow_label = fl6.flowlabel;
223
224         /*
225          *      TCP over IPv4
226          */
227
228         if (addr_type & IPV6_ADDR_MAPPED) {
229                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230                 struct sockaddr_in sin;
231
232                 if (__ipv6_only_sock(sk))
233                         return -ENETUNREACH;
234
235                 sin.sin_family = AF_INET;
236                 sin.sin_port = usin->sin6_port;
237                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238
239                 icsk->icsk_af_ops = &ipv6_mapped;
240                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244
245                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246
247                 if (err) {
248                         icsk->icsk_ext_hdr_len = exthdrlen;
249                         icsk->icsk_af_ops = &ipv6_specific;
250                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252                         tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254                         goto failure;
255                 }
256                 np->saddr = sk->sk_v6_rcv_saddr;
257
258                 return err;
259         }
260
261         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262                 saddr = &sk->sk_v6_rcv_saddr;
263
264         fl6.flowi6_proto = IPPROTO_TCP;
265         fl6.daddr = sk->sk_v6_daddr;
266         fl6.saddr = saddr ? *saddr : np->saddr;
267         fl6.flowi6_oif = sk->sk_bound_dev_if;
268         fl6.flowi6_mark = sk->sk_mark;
269         fl6.fl6_dport = usin->sin6_port;
270         fl6.fl6_sport = inet->inet_sport;
271         fl6.flowi6_uid = sk->sk_uid;
272
273         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274         final_p = fl6_update_dst(&fl6, opt, &final);
275
276         security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
277
278         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
279         if (IS_ERR(dst)) {
280                 err = PTR_ERR(dst);
281                 goto failure;
282         }
283
284         if (!saddr) {
285                 saddr = &fl6.saddr;
286                 sk->sk_v6_rcv_saddr = *saddr;
287         }
288
289         /* set the source address */
290         np->saddr = *saddr;
291         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
292
293         sk->sk_gso_type = SKB_GSO_TCPV6;
294         ip6_dst_store(sk, dst, NULL, NULL);
295
296         icsk->icsk_ext_hdr_len = 0;
297         if (opt)
298                 icsk->icsk_ext_hdr_len = opt->opt_flen +
299                                          opt->opt_nflen;
300
301         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302
303         inet->inet_dport = usin->sin6_port;
304
305         tcp_set_state(sk, TCP_SYN_SENT);
306         err = inet6_hash_connect(tcp_death_row, sk);
307         if (err)
308                 goto late_failure;
309
310         sk_set_txhash(sk);
311
312         if (likely(!tp->repair)) {
313                 if (!tp->write_seq)
314                         WRITE_ONCE(tp->write_seq,
315                                    secure_tcpv6_seq(np->saddr.s6_addr32,
316                                                     sk->sk_v6_daddr.s6_addr32,
317                                                     inet->inet_sport,
318                                                     inet->inet_dport));
319                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
320                                                    np->saddr.s6_addr32,
321                                                    sk->sk_v6_daddr.s6_addr32);
322         }
323
324         if (tcp_fastopen_defer_connect(sk, &err))
325                 return err;
326         if (err)
327                 goto late_failure;
328
329         err = tcp_connect(sk);
330         if (err)
331                 goto late_failure;
332
333         return 0;
334
335 late_failure:
336         tcp_set_state(sk, TCP_CLOSE);
337 failure:
338         inet->inet_dport = 0;
339         sk->sk_route_caps = 0;
340         return err;
341 }
342
343 static void tcp_v6_mtu_reduced(struct sock *sk)
344 {
345         struct dst_entry *dst;
346         u32 mtu;
347
348         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
349                 return;
350
351         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
352
353         /* Drop requests trying to increase our current mss.
354          * Check done in __ip6_rt_update_pmtu() is too late.
355          */
356         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
357                 return;
358
359         dst = inet6_csk_update_pmtu(sk, mtu);
360         if (!dst)
361                 return;
362
363         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
364                 tcp_sync_mss(sk, dst_mtu(dst));
365                 tcp_simple_retransmit(sk);
366         }
367 }
368
369 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
370                 u8 type, u8 code, int offset, __be32 info)
371 {
372         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
373         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
374         struct net *net = dev_net(skb->dev);
375         struct request_sock *fastopen;
376         struct ipv6_pinfo *np;
377         struct tcp_sock *tp;
378         __u32 seq, snd_una;
379         struct sock *sk;
380         bool fatal;
381         int err;
382
383         sk = __inet6_lookup_established(net, &tcp_hashinfo,
384                                         &hdr->daddr, th->dest,
385                                         &hdr->saddr, ntohs(th->source),
386                                         skb->dev->ifindex, inet6_sdif(skb));
387
388         if (!sk) {
389                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
390                                   ICMP6_MIB_INERRORS);
391                 return -ENOENT;
392         }
393
394         if (sk->sk_state == TCP_TIME_WAIT) {
395                 inet_twsk_put(inet_twsk(sk));
396                 return 0;
397         }
398         seq = ntohl(th->seq);
399         fatal = icmpv6_err_convert(type, code, &err);
400         if (sk->sk_state == TCP_NEW_SYN_RECV) {
401                 tcp_req_err(sk, seq, fatal);
402                 return 0;
403         }
404
405         bh_lock_sock(sk);
406         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
407                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
408
409         if (sk->sk_state == TCP_CLOSE)
410                 goto out;
411
412         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
413                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
414                 goto out;
415         }
416
417         tp = tcp_sk(sk);
418         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
419         fastopen = rcu_dereference(tp->fastopen_rsk);
420         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
421         if (sk->sk_state != TCP_LISTEN &&
422             !between(seq, snd_una, tp->snd_nxt)) {
423                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
424                 goto out;
425         }
426
427         np = tcp_inet6_sk(sk);
428
429         if (type == NDISC_REDIRECT) {
430                 if (!sock_owned_by_user(sk)) {
431                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
432
433                         if (dst)
434                                 dst->ops->redirect(dst, sk, skb);
435                 }
436                 goto out;
437         }
438
439         if (type == ICMPV6_PKT_TOOBIG) {
440                 u32 mtu = ntohl(info);
441
442                 /* We are not interested in TCP_LISTEN and open_requests
443                  * (SYN-ACKs send out by Linux are always <576bytes so
444                  * they should go through unfragmented).
445                  */
446                 if (sk->sk_state == TCP_LISTEN)
447                         goto out;
448
449                 if (!ip6_sk_accept_pmtu(sk))
450                         goto out;
451
452                 if (mtu < IPV6_MIN_MTU)
453                         goto out;
454
455                 WRITE_ONCE(tp->mtu_info, mtu);
456
457                 if (!sock_owned_by_user(sk))
458                         tcp_v6_mtu_reduced(sk);
459                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
460                                            &sk->sk_tsq_flags))
461                         sock_hold(sk);
462                 goto out;
463         }
464
465
466         /* Might be for an request_sock */
467         switch (sk->sk_state) {
468         case TCP_SYN_SENT:
469         case TCP_SYN_RECV:
470                 /* Only in fast or simultaneous open. If a fast open socket is
471                  * is already accepted it is treated as a connected one below.
472                  */
473                 if (fastopen && !fastopen->sk)
474                         break;
475
476                 if (!sock_owned_by_user(sk)) {
477                         sk->sk_err = err;
478                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
479
480                         tcp_done(sk);
481                 } else
482                         sk->sk_err_soft = err;
483                 goto out;
484         }
485
486         if (!sock_owned_by_user(sk) && np->recverr) {
487                 sk->sk_err = err;
488                 sk->sk_error_report(sk);
489         } else
490                 sk->sk_err_soft = err;
491
492 out:
493         bh_unlock_sock(sk);
494         sock_put(sk);
495         return 0;
496 }
497
498
499 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
500                               struct flowi *fl,
501                               struct request_sock *req,
502                               struct tcp_fastopen_cookie *foc,
503                               enum tcp_synack_type synack_type)
504 {
505         struct inet_request_sock *ireq = inet_rsk(req);
506         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
507         struct ipv6_txoptions *opt;
508         struct flowi6 *fl6 = &fl->u.ip6;
509         struct sk_buff *skb;
510         int err = -ENOMEM;
511
512         /* First, grab a route. */
513         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
514                                                IPPROTO_TCP)) == NULL)
515                 goto done;
516
517         skb = tcp_make_synack(sk, dst, req, foc, synack_type);
518
519         if (skb) {
520                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
521                                     &ireq->ir_v6_rmt_addr);
522
523                 fl6->daddr = ireq->ir_v6_rmt_addr;
524                 if (np->repflow && ireq->pktopts)
525                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
526
527                 rcu_read_lock();
528                 opt = ireq->ipv6_opt;
529                 if (!opt)
530                         opt = rcu_dereference(np->opt);
531                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
532                                np->tclass, sk->sk_priority);
533                 rcu_read_unlock();
534                 err = net_xmit_eval(err);
535         }
536
537 done:
538         return err;
539 }
540
541
542 static void tcp_v6_reqsk_destructor(struct request_sock *req)
543 {
544         kfree(inet_rsk(req)->ipv6_opt);
545         kfree_skb(inet_rsk(req)->pktopts);
546 }
547
548 #ifdef CONFIG_TCP_MD5SIG
549 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
550                                                    const struct in6_addr *addr)
551 {
552         return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
553 }
554
555 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
556                                                 const struct sock *addr_sk)
557 {
558         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
559 }
560
561 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
562                                  char __user *optval, int optlen)
563 {
564         struct tcp_md5sig cmd;
565         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
566         u8 prefixlen;
567
568         if (optlen < sizeof(cmd))
569                 return -EINVAL;
570
571         if (copy_from_user(&cmd, optval, sizeof(cmd)))
572                 return -EFAULT;
573
574         if (sin6->sin6_family != AF_INET6)
575                 return -EINVAL;
576
577         if (optname == TCP_MD5SIG_EXT &&
578             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
579                 prefixlen = cmd.tcpm_prefixlen;
580                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
581                                         prefixlen > 32))
582                         return -EINVAL;
583         } else {
584                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
585         }
586
587         if (!cmd.tcpm_keylen) {
588                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590                                               AF_INET, prefixlen);
591                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
592                                       AF_INET6, prefixlen);
593         }
594
595         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
596                 return -EINVAL;
597
598         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
599                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
600                                       AF_INET, prefixlen, cmd.tcpm_key,
601                                       cmd.tcpm_keylen, GFP_KERNEL);
602
603         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
604                               AF_INET6, prefixlen, cmd.tcpm_key,
605                               cmd.tcpm_keylen, GFP_KERNEL);
606 }
607
608 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
609                                    const struct in6_addr *daddr,
610                                    const struct in6_addr *saddr,
611                                    const struct tcphdr *th, int nbytes)
612 {
613         struct tcp6_pseudohdr *bp;
614         struct scatterlist sg;
615         struct tcphdr *_th;
616
617         bp = hp->scratch;
618         /* 1. TCP pseudo-header (RFC2460) */
619         bp->saddr = *saddr;
620         bp->daddr = *daddr;
621         bp->protocol = cpu_to_be32(IPPROTO_TCP);
622         bp->len = cpu_to_be32(nbytes);
623
624         _th = (struct tcphdr *)(bp + 1);
625         memcpy(_th, th, sizeof(*th));
626         _th->check = 0;
627
628         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
629         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
630                                 sizeof(*bp) + sizeof(*th));
631         return crypto_ahash_update(hp->md5_req);
632 }
633
634 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
635                                const struct in6_addr *daddr, struct in6_addr *saddr,
636                                const struct tcphdr *th)
637 {
638         struct tcp_md5sig_pool *hp;
639         struct ahash_request *req;
640
641         hp = tcp_get_md5sig_pool();
642         if (!hp)
643                 goto clear_hash_noput;
644         req = hp->md5_req;
645
646         if (crypto_ahash_init(req))
647                 goto clear_hash;
648         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
649                 goto clear_hash;
650         if (tcp_md5_hash_key(hp, key))
651                 goto clear_hash;
652         ahash_request_set_crypt(req, NULL, md5_hash, 0);
653         if (crypto_ahash_final(req))
654                 goto clear_hash;
655
656         tcp_put_md5sig_pool();
657         return 0;
658
659 clear_hash:
660         tcp_put_md5sig_pool();
661 clear_hash_noput:
662         memset(md5_hash, 0, 16);
663         return 1;
664 }
665
666 static int tcp_v6_md5_hash_skb(char *md5_hash,
667                                const struct tcp_md5sig_key *key,
668                                const struct sock *sk,
669                                const struct sk_buff *skb)
670 {
671         const struct in6_addr *saddr, *daddr;
672         struct tcp_md5sig_pool *hp;
673         struct ahash_request *req;
674         const struct tcphdr *th = tcp_hdr(skb);
675
676         if (sk) { /* valid for establish/request sockets */
677                 saddr = &sk->sk_v6_rcv_saddr;
678                 daddr = &sk->sk_v6_daddr;
679         } else {
680                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
681                 saddr = &ip6h->saddr;
682                 daddr = &ip6h->daddr;
683         }
684
685         hp = tcp_get_md5sig_pool();
686         if (!hp)
687                 goto clear_hash_noput;
688         req = hp->md5_req;
689
690         if (crypto_ahash_init(req))
691                 goto clear_hash;
692
693         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
694                 goto clear_hash;
695         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
696                 goto clear_hash;
697         if (tcp_md5_hash_key(hp, key))
698                 goto clear_hash;
699         ahash_request_set_crypt(req, NULL, md5_hash, 0);
700         if (crypto_ahash_final(req))
701                 goto clear_hash;
702
703         tcp_put_md5sig_pool();
704         return 0;
705
706 clear_hash:
707         tcp_put_md5sig_pool();
708 clear_hash_noput:
709         memset(md5_hash, 0, 16);
710         return 1;
711 }
712
713 #endif
714
715 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
716                                     const struct sk_buff *skb)
717 {
718 #ifdef CONFIG_TCP_MD5SIG
719         const __u8 *hash_location = NULL;
720         struct tcp_md5sig_key *hash_expected;
721         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
722         const struct tcphdr *th = tcp_hdr(skb);
723         int genhash;
724         u8 newhash[16];
725
726         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
727         hash_location = tcp_parse_md5sig_option(th);
728
729         /* We've parsed the options - do we have a hash? */
730         if (!hash_expected && !hash_location)
731                 return false;
732
733         if (hash_expected && !hash_location) {
734                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
735                 return true;
736         }
737
738         if (!hash_expected && hash_location) {
739                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
740                 return true;
741         }
742
743         /* check the signature */
744         genhash = tcp_v6_md5_hash_skb(newhash,
745                                       hash_expected,
746                                       NULL, skb);
747
748         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
749                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
750                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
751                                      genhash ? "failed" : "mismatch",
752                                      &ip6h->saddr, ntohs(th->source),
753                                      &ip6h->daddr, ntohs(th->dest));
754                 return true;
755         }
756 #endif
757         return false;
758 }
759
760 static void tcp_v6_init_req(struct request_sock *req,
761                             const struct sock *sk_listener,
762                             struct sk_buff *skb)
763 {
764         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
765         struct inet_request_sock *ireq = inet_rsk(req);
766         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
767
768         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
769         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
770
771         /* So that link locals have meaning */
772         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
773             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
774                 ireq->ir_iif = tcp_v6_iif(skb);
775
776         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
777             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
778              np->rxopt.bits.rxinfo ||
779              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
780              np->rxopt.bits.rxohlim || np->repflow)) {
781                 refcount_inc(&skb->users);
782                 ireq->pktopts = skb;
783         }
784 }
785
786 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
787                                           struct flowi *fl,
788                                           const struct request_sock *req)
789 {
790         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
791 }
792
793 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
794         .family         =       AF_INET6,
795         .obj_size       =       sizeof(struct tcp6_request_sock),
796         .rtx_syn_ack    =       tcp_rtx_synack,
797         .send_ack       =       tcp_v6_reqsk_send_ack,
798         .destructor     =       tcp_v6_reqsk_destructor,
799         .send_reset     =       tcp_v6_send_reset,
800         .syn_ack_timeout =      tcp_syn_ack_timeout,
801 };
802
803 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
804         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
805                                 sizeof(struct ipv6hdr),
806 #ifdef CONFIG_TCP_MD5SIG
807         .req_md5_lookup =       tcp_v6_md5_lookup,
808         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
809 #endif
810         .init_req       =       tcp_v6_init_req,
811 #ifdef CONFIG_SYN_COOKIES
812         .cookie_init_seq =      cookie_v6_init_sequence,
813 #endif
814         .route_req      =       tcp_v6_route_req,
815         .init_seq       =       tcp_v6_init_seq,
816         .init_ts_off    =       tcp_v6_init_ts_off,
817         .send_synack    =       tcp_v6_send_synack,
818 };
819
820 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
821                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
822                                  int oif, struct tcp_md5sig_key *key, int rst,
823                                  u8 tclass, __be32 label, u32 priority)
824 {
825         const struct tcphdr *th = tcp_hdr(skb);
826         struct tcphdr *t1;
827         struct sk_buff *buff;
828         struct flowi6 fl6;
829         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
830         struct sock *ctl_sk = net->ipv6.tcp_sk;
831         unsigned int tot_len = sizeof(struct tcphdr);
832         struct dst_entry *dst;
833         __be32 *topt;
834         __u32 mark = 0;
835
836         if (tsecr)
837                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
838 #ifdef CONFIG_TCP_MD5SIG
839         if (key)
840                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
841 #endif
842
843         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
844                          GFP_ATOMIC);
845         if (!buff)
846                 return;
847
848         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
849
850         t1 = skb_push(buff, tot_len);
851         skb_reset_transport_header(buff);
852
853         /* Swap the send and the receive. */
854         memset(t1, 0, sizeof(*t1));
855         t1->dest = th->source;
856         t1->source = th->dest;
857         t1->doff = tot_len / 4;
858         t1->seq = htonl(seq);
859         t1->ack_seq = htonl(ack);
860         t1->ack = !rst || !th->ack;
861         t1->rst = rst;
862         t1->window = htons(win);
863
864         topt = (__be32 *)(t1 + 1);
865
866         if (tsecr) {
867                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
868                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
869                 *topt++ = htonl(tsval);
870                 *topt++ = htonl(tsecr);
871         }
872
873 #ifdef CONFIG_TCP_MD5SIG
874         if (key) {
875                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
876                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
877                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
878                                     &ipv6_hdr(skb)->saddr,
879                                     &ipv6_hdr(skb)->daddr, t1);
880         }
881 #endif
882
883         memset(&fl6, 0, sizeof(fl6));
884         fl6.daddr = ipv6_hdr(skb)->saddr;
885         fl6.saddr = ipv6_hdr(skb)->daddr;
886         fl6.flowlabel = label;
887
888         buff->ip_summed = CHECKSUM_PARTIAL;
889         buff->csum = 0;
890
891         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
892
893         fl6.flowi6_proto = IPPROTO_TCP;
894         if (rt6_need_strict(&fl6.daddr) && !oif)
895                 fl6.flowi6_oif = tcp_v6_iif(skb);
896         else {
897                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
898                         oif = skb->skb_iif;
899
900                 fl6.flowi6_oif = oif;
901         }
902
903         if (sk) {
904                 if (sk->sk_state == TCP_TIME_WAIT) {
905                         mark = inet_twsk(sk)->tw_mark;
906                         /* autoflowlabel relies on buff->hash */
907                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
908                                      PKT_HASH_TYPE_L4);
909                 } else {
910                         mark = sk->sk_mark;
911                 }
912                 buff->tstamp = tcp_transmit_time(sk);
913         }
914         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
915         fl6.fl6_dport = t1->dest;
916         fl6.fl6_sport = t1->source;
917         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
918         security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
919
920         /* Pass a socket to ip6_dst_lookup either it is for RST
921          * Underlying function will use this to retrieve the network
922          * namespace
923          */
924         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
925         if (!IS_ERR(dst)) {
926                 skb_dst_set(buff, dst);
927                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
928                          priority);
929                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
930                 if (rst)
931                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
932                 return;
933         }
934
935         kfree_skb(buff);
936 }
937
938 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
939 {
940         const struct tcphdr *th = tcp_hdr(skb);
941         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
942         u32 seq = 0, ack_seq = 0;
943         struct tcp_md5sig_key *key = NULL;
944 #ifdef CONFIG_TCP_MD5SIG
945         const __u8 *hash_location = NULL;
946         unsigned char newhash[16];
947         int genhash;
948         struct sock *sk1 = NULL;
949 #endif
950         __be32 label = 0;
951         u32 priority = 0;
952         struct net *net;
953         int oif = 0;
954
955         if (th->rst)
956                 return;
957
958         /* If sk not NULL, it means we did a successful lookup and incoming
959          * route had to be correct. prequeue might have dropped our dst.
960          */
961         if (!sk && !ipv6_unicast_destination(skb))
962                 return;
963
964         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
965 #ifdef CONFIG_TCP_MD5SIG
966         rcu_read_lock();
967         hash_location = tcp_parse_md5sig_option(th);
968         if (sk && sk_fullsock(sk)) {
969                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
970         } else if (hash_location) {
971                 /*
972                  * active side is lost. Try to find listening socket through
973                  * source port, and then find md5 key through listening socket.
974                  * we are not loose security here:
975                  * Incoming packet is checked with md5 hash with finding key,
976                  * no RST generated if md5 hash doesn't match.
977                  */
978                 sk1 = inet6_lookup_listener(net,
979                                            &tcp_hashinfo, NULL, 0,
980                                            &ipv6h->saddr,
981                                            th->source, &ipv6h->daddr,
982                                            ntohs(th->source),
983                                            tcp_v6_iif_l3_slave(skb),
984                                            tcp_v6_sdif(skb));
985                 if (!sk1)
986                         goto out;
987
988                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
989                 if (!key)
990                         goto out;
991
992                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
993                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
994                         goto out;
995         }
996 #endif
997
998         if (th->ack)
999                 seq = ntohl(th->ack_seq);
1000         else
1001                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1002                           (th->doff << 2);
1003
1004         if (sk) {
1005                 oif = sk->sk_bound_dev_if;
1006                 if (sk_fullsock(sk)) {
1007                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1008
1009                         trace_tcp_send_reset(sk, skb);
1010                         if (np->repflow)
1011                                 label = ip6_flowlabel(ipv6h);
1012                         priority = sk->sk_priority;
1013                 }
1014                 if (sk->sk_state == TCP_TIME_WAIT) {
1015                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1016                         priority = inet_twsk(sk)->tw_priority;
1017                 }
1018         } else {
1019                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1020                         label = ip6_flowlabel(ipv6h);
1021         }
1022
1023         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1024                              label, priority);
1025
1026 #ifdef CONFIG_TCP_MD5SIG
1027 out:
1028         rcu_read_unlock();
1029 #endif
1030 }
1031
1032 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1033                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1034                             struct tcp_md5sig_key *key, u8 tclass,
1035                             __be32 label, u32 priority)
1036 {
1037         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1038                              tclass, label, priority);
1039 }
1040
1041 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1042 {
1043         struct inet_timewait_sock *tw = inet_twsk(sk);
1044         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1045
1046         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1047                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1048                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1049                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1050                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1051
1052         inet_twsk_put(tw);
1053 }
1054
1055 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1056                                   struct request_sock *req)
1057 {
1058         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1059          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1060          */
1061         /* RFC 7323 2.3
1062          * The window field (SEG.WND) of every outgoing segment, with the
1063          * exception of <SYN> segments, MUST be right-shifted by
1064          * Rcv.Wind.Shift bits:
1065          */
1066         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1067                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1068                         tcp_rsk(req)->rcv_nxt,
1069                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1070                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1071                         req->ts_recent, sk->sk_bound_dev_if,
1072                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1073                         0, 0, sk->sk_priority);
1074 }
1075
1076
1077 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1078 {
1079 #ifdef CONFIG_SYN_COOKIES
1080         const struct tcphdr *th = tcp_hdr(skb);
1081
1082         if (!th->syn)
1083                 sk = cookie_v6_check(sk, skb);
1084 #endif
1085         return sk;
1086 }
1087
1088 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1089                          struct tcphdr *th, u32 *cookie)
1090 {
1091         u16 mss = 0;
1092 #ifdef CONFIG_SYN_COOKIES
1093         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1094                                     &tcp_request_sock_ipv6_ops, sk, th);
1095         if (mss) {
1096                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1097                 tcp_synq_overflow(sk);
1098         }
1099 #endif
1100         return mss;
1101 }
1102
1103 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1104 {
1105         if (skb->protocol == htons(ETH_P_IP))
1106                 return tcp_v4_conn_request(sk, skb);
1107
1108         if (!ipv6_unicast_destination(skb))
1109                 goto drop;
1110
1111         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1112                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1113                 return 0;
1114         }
1115
1116         return tcp_conn_request(&tcp6_request_sock_ops,
1117                                 &tcp_request_sock_ipv6_ops, sk, skb);
1118
1119 drop:
1120         tcp_listendrop(sk);
1121         return 0; /* don't send reset */
1122 }
1123
1124 static void tcp_v6_restore_cb(struct sk_buff *skb)
1125 {
1126         /* We need to move header back to the beginning if xfrm6_policy_check()
1127          * and tcp_v6_fill_cb() are going to be called again.
1128          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1129          */
1130         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1131                 sizeof(struct inet6_skb_parm));
1132 }
1133
1134 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1135                                          struct request_sock *req,
1136                                          struct dst_entry *dst,
1137                                          struct request_sock *req_unhash,
1138                                          bool *own_req)
1139 {
1140         struct inet_request_sock *ireq;
1141         struct ipv6_pinfo *newnp;
1142         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1143         struct ipv6_txoptions *opt;
1144         struct inet_sock *newinet;
1145         bool found_dup_sk = false;
1146         struct tcp_sock *newtp;
1147         struct sock *newsk;
1148 #ifdef CONFIG_TCP_MD5SIG
1149         struct tcp_md5sig_key *key;
1150 #endif
1151         struct flowi6 fl6;
1152
1153         if (skb->protocol == htons(ETH_P_IP)) {
1154                 /*
1155                  *      v6 mapped
1156                  */
1157
1158                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1159                                              req_unhash, own_req);
1160
1161                 if (!newsk)
1162                         return NULL;
1163
1164                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1165
1166                 newinet = inet_sk(newsk);
1167                 newnp = tcp_inet6_sk(newsk);
1168                 newtp = tcp_sk(newsk);
1169
1170                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1171
1172                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1173
1174                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1175                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1176 #ifdef CONFIG_TCP_MD5SIG
1177                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1178 #endif
1179
1180                 newnp->ipv6_mc_list = NULL;
1181                 newnp->ipv6_ac_list = NULL;
1182                 newnp->ipv6_fl_list = NULL;
1183                 newnp->pktoptions  = NULL;
1184                 newnp->opt         = NULL;
1185                 newnp->mcast_oif   = inet_iif(skb);
1186                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1187                 newnp->rcv_flowinfo = 0;
1188                 if (np->repflow)
1189                         newnp->flow_label = 0;
1190
1191                 /*
1192                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1193                  * here, tcp_create_openreq_child now does this for us, see the comment in
1194                  * that function for the gory details. -acme
1195                  */
1196
1197                 /* It is tricky place. Until this moment IPv4 tcp
1198                    worked with IPv6 icsk.icsk_af_ops.
1199                    Sync it now.
1200                  */
1201                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1202
1203                 return newsk;
1204         }
1205
1206         ireq = inet_rsk(req);
1207
1208         if (sk_acceptq_is_full(sk))
1209                 goto out_overflow;
1210
1211         if (!dst) {
1212                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1213                 if (!dst)
1214                         goto out;
1215         }
1216
1217         newsk = tcp_create_openreq_child(sk, req, skb);
1218         if (!newsk)
1219                 goto out_nonewsk;
1220
1221         /*
1222          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1223          * count here, tcp_create_openreq_child now does this for us, see the
1224          * comment in that function for the gory details. -acme
1225          */
1226
1227         newsk->sk_gso_type = SKB_GSO_TCPV6;
1228         ip6_dst_store(newsk, dst, NULL, NULL);
1229         inet6_sk_rx_dst_set(newsk, skb);
1230
1231         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1232
1233         newtp = tcp_sk(newsk);
1234         newinet = inet_sk(newsk);
1235         newnp = tcp_inet6_sk(newsk);
1236
1237         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1238
1239         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1240         newnp->saddr = ireq->ir_v6_loc_addr;
1241         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1242         newsk->sk_bound_dev_if = ireq->ir_iif;
1243
1244         /* Now IPv6 options...
1245
1246            First: no IPv4 options.
1247          */
1248         newinet->inet_opt = NULL;
1249         newnp->ipv6_mc_list = NULL;
1250         newnp->ipv6_ac_list = NULL;
1251         newnp->ipv6_fl_list = NULL;
1252
1253         /* Clone RX bits */
1254         newnp->rxopt.all = np->rxopt.all;
1255
1256         newnp->pktoptions = NULL;
1257         newnp->opt        = NULL;
1258         newnp->mcast_oif  = tcp_v6_iif(skb);
1259         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1260         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1261         if (np->repflow)
1262                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1263
1264         /* Clone native IPv6 options from listening socket (if any)
1265
1266            Yes, keeping reference count would be much more clever,
1267            but we make one more one thing there: reattach optmem
1268            to newsk.
1269          */
1270         opt = ireq->ipv6_opt;
1271         if (!opt)
1272                 opt = rcu_dereference(np->opt);
1273         if (opt) {
1274                 opt = ipv6_dup_options(newsk, opt);
1275                 RCU_INIT_POINTER(newnp->opt, opt);
1276         }
1277         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1278         if (opt)
1279                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1280                                                     opt->opt_flen;
1281
1282         tcp_ca_openreq_child(newsk, dst);
1283
1284         tcp_sync_mss(newsk, dst_mtu(dst));
1285         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1286
1287         tcp_initialize_rcv_mss(newsk);
1288
1289         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1290         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1291
1292 #ifdef CONFIG_TCP_MD5SIG
1293         /* Copy over the MD5 key from the original socket */
1294         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1295         if (key) {
1296                 /* We're using one, so create a matching key
1297                  * on the newsk structure. If we fail to get
1298                  * memory, then we end up not copying the key
1299                  * across. Shucks.
1300                  */
1301                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1302                                AF_INET6, 128, key->key, key->keylen,
1303                                sk_gfp_mask(sk, GFP_ATOMIC));
1304         }
1305 #endif
1306
1307         if (__inet_inherit_port(sk, newsk) < 0) {
1308                 inet_csk_prepare_forced_close(newsk);
1309                 tcp_done(newsk);
1310                 goto out;
1311         }
1312         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1313                                        &found_dup_sk);
1314         if (*own_req) {
1315                 tcp_move_syn(newtp, req);
1316
1317                 /* Clone pktoptions received with SYN, if we own the req */
1318                 if (ireq->pktopts) {
1319                         newnp->pktoptions = skb_clone(ireq->pktopts,
1320                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1321                         consume_skb(ireq->pktopts);
1322                         ireq->pktopts = NULL;
1323                         if (newnp->pktoptions) {
1324                                 tcp_v6_restore_cb(newnp->pktoptions);
1325                                 skb_set_owner_r(newnp->pktoptions, newsk);
1326                         }
1327                 }
1328         } else {
1329                 if (!req_unhash && found_dup_sk) {
1330                         /* This code path should only be executed in the
1331                          * syncookie case only
1332                          */
1333                         bh_unlock_sock(newsk);
1334                         sock_put(newsk);
1335                         newsk = NULL;
1336                 }
1337         }
1338
1339         return newsk;
1340
1341 out_overflow:
1342         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1343 out_nonewsk:
1344         dst_release(dst);
1345 out:
1346         tcp_listendrop(sk);
1347         return NULL;
1348 }
1349
1350 /* The socket must have it's spinlock held when we get
1351  * here, unless it is a TCP_LISTEN socket.
1352  *
1353  * We have a potential double-lock case here, so even when
1354  * doing backlog processing we use the BH locking scheme.
1355  * This is because we cannot sleep with the original spinlock
1356  * held.
1357  */
1358 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1359 {
1360         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1361         struct sk_buff *opt_skb = NULL;
1362         struct tcp_sock *tp;
1363
1364         /* Imagine: socket is IPv6. IPv4 packet arrives,
1365            goes to IPv4 receive handler and backlogged.
1366            From backlog it always goes here. Kerboom...
1367            Fortunately, tcp_rcv_established and rcv_established
1368            handle them correctly, but it is not case with
1369            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1370          */
1371
1372         if (skb->protocol == htons(ETH_P_IP))
1373                 return tcp_v4_do_rcv(sk, skb);
1374
1375         /*
1376          *      socket locking is here for SMP purposes as backlog rcv
1377          *      is currently called with bh processing disabled.
1378          */
1379
1380         /* Do Stevens' IPV6_PKTOPTIONS.
1381
1382            Yes, guys, it is the only place in our code, where we
1383            may make it not affecting IPv4.
1384            The rest of code is protocol independent,
1385            and I do not like idea to uglify IPv4.
1386
1387            Actually, all the idea behind IPV6_PKTOPTIONS
1388            looks not very well thought. For now we latch
1389            options, received in the last packet, enqueued
1390            by tcp. Feel free to propose better solution.
1391                                                --ANK (980728)
1392          */
1393         if (np->rxopt.all)
1394                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1395
1396         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1397                 struct dst_entry *dst = sk->sk_rx_dst;
1398
1399                 sock_rps_save_rxhash(sk, skb);
1400                 sk_mark_napi_id(sk, skb);
1401                 if (dst) {
1402                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1403                             dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1404                                 dst_release(dst);
1405                                 sk->sk_rx_dst = NULL;
1406                         }
1407                 }
1408
1409                 tcp_rcv_established(sk, skb);
1410                 if (opt_skb)
1411                         goto ipv6_pktoptions;
1412                 return 0;
1413         }
1414
1415         if (tcp_checksum_complete(skb))
1416                 goto csum_err;
1417
1418         if (sk->sk_state == TCP_LISTEN) {
1419                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1420
1421                 if (!nsk)
1422                         goto discard;
1423
1424                 if (nsk != sk) {
1425                         if (tcp_child_process(sk, nsk, skb))
1426                                 goto reset;
1427                         if (opt_skb)
1428                                 __kfree_skb(opt_skb);
1429                         return 0;
1430                 }
1431         } else
1432                 sock_rps_save_rxhash(sk, skb);
1433
1434         if (tcp_rcv_state_process(sk, skb))
1435                 goto reset;
1436         if (opt_skb)
1437                 goto ipv6_pktoptions;
1438         return 0;
1439
1440 reset:
1441         tcp_v6_send_reset(sk, skb);
1442 discard:
1443         if (opt_skb)
1444                 __kfree_skb(opt_skb);
1445         kfree_skb(skb);
1446         return 0;
1447 csum_err:
1448         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1449         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1450         goto discard;
1451
1452
1453 ipv6_pktoptions:
1454         /* Do you ask, what is it?
1455
1456            1. skb was enqueued by tcp.
1457            2. skb is added to tail of read queue, rather than out of order.
1458            3. socket is not in passive state.
1459            4. Finally, it really contains options, which user wants to receive.
1460          */
1461         tp = tcp_sk(sk);
1462         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1463             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1464                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1465                         np->mcast_oif = tcp_v6_iif(opt_skb);
1466                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1467                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1468                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1469                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1470                 if (np->repflow)
1471                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1472                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1473                         skb_set_owner_r(opt_skb, sk);
1474                         tcp_v6_restore_cb(opt_skb);
1475                         opt_skb = xchg(&np->pktoptions, opt_skb);
1476                 } else {
1477                         __kfree_skb(opt_skb);
1478                         opt_skb = xchg(&np->pktoptions, NULL);
1479                 }
1480         }
1481
1482         kfree_skb(opt_skb);
1483         return 0;
1484 }
1485
1486 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1487                            const struct tcphdr *th)
1488 {
1489         /* This is tricky: we move IP6CB at its correct location into
1490          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1491          * _decode_session6() uses IP6CB().
1492          * barrier() makes sure compiler won't play aliasing games.
1493          */
1494         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1495                 sizeof(struct inet6_skb_parm));
1496         barrier();
1497
1498         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1499         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1500                                     skb->len - th->doff*4);
1501         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1502         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1503         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1504         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1505         TCP_SKB_CB(skb)->sacked = 0;
1506         TCP_SKB_CB(skb)->has_rxtstamp =
1507                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1508 }
1509
1510 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1511 {
1512         struct sk_buff *skb_to_free;
1513         int sdif = inet6_sdif(skb);
1514         const struct tcphdr *th;
1515         const struct ipv6hdr *hdr;
1516         bool refcounted;
1517         struct sock *sk;
1518         int ret;
1519         struct net *net = dev_net(skb->dev);
1520
1521         if (skb->pkt_type != PACKET_HOST)
1522                 goto discard_it;
1523
1524         /*
1525          *      Count it even if it's bad.
1526          */
1527         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1528
1529         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1530                 goto discard_it;
1531
1532         th = (const struct tcphdr *)skb->data;
1533
1534         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1535                 goto bad_packet;
1536         if (!pskb_may_pull(skb, th->doff*4))
1537                 goto discard_it;
1538
1539         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1540                 goto csum_error;
1541
1542         th = (const struct tcphdr *)skb->data;
1543         hdr = ipv6_hdr(skb);
1544
1545 lookup:
1546         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1547                                 th->source, th->dest, inet6_iif(skb), sdif,
1548                                 &refcounted);
1549         if (!sk)
1550                 goto no_tcp_socket;
1551
1552 process:
1553         if (sk->sk_state == TCP_TIME_WAIT)
1554                 goto do_time_wait;
1555
1556         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1557                 struct request_sock *req = inet_reqsk(sk);
1558                 bool req_stolen = false;
1559                 struct sock *nsk;
1560
1561                 sk = req->rsk_listener;
1562                 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1563                         sk_drops_add(sk, skb);
1564                         reqsk_put(req);
1565                         goto discard_it;
1566                 }
1567                 if (tcp_checksum_complete(skb)) {
1568                         reqsk_put(req);
1569                         goto csum_error;
1570                 }
1571                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1572                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1573                         goto lookup;
1574                 }
1575                 sock_hold(sk);
1576                 refcounted = true;
1577                 nsk = NULL;
1578                 if (!tcp_filter(sk, skb)) {
1579                         th = (const struct tcphdr *)skb->data;
1580                         hdr = ipv6_hdr(skb);
1581                         tcp_v6_fill_cb(skb, hdr, th);
1582                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1583                 }
1584                 if (!nsk) {
1585                         reqsk_put(req);
1586                         if (req_stolen) {
1587                                 /* Another cpu got exclusive access to req
1588                                  * and created a full blown socket.
1589                                  * Try to feed this packet to this socket
1590                                  * instead of discarding it.
1591                                  */
1592                                 tcp_v6_restore_cb(skb);
1593                                 sock_put(sk);
1594                                 goto lookup;
1595                         }
1596                         goto discard_and_relse;
1597                 }
1598                 if (nsk == sk) {
1599                         reqsk_put(req);
1600                         tcp_v6_restore_cb(skb);
1601                 } else if (tcp_child_process(sk, nsk, skb)) {
1602                         tcp_v6_send_reset(nsk, skb);
1603                         goto discard_and_relse;
1604                 } else {
1605                         sock_put(sk);
1606                         return 0;
1607                 }
1608         }
1609         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1610                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1611                 goto discard_and_relse;
1612         }
1613
1614         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1615                 goto discard_and_relse;
1616
1617         if (tcp_v6_inbound_md5_hash(sk, skb))
1618                 goto discard_and_relse;
1619
1620         if (tcp_filter(sk, skb))
1621                 goto discard_and_relse;
1622         th = (const struct tcphdr *)skb->data;
1623         hdr = ipv6_hdr(skb);
1624         tcp_v6_fill_cb(skb, hdr, th);
1625
1626         skb->dev = NULL;
1627
1628         if (sk->sk_state == TCP_LISTEN) {
1629                 ret = tcp_v6_do_rcv(sk, skb);
1630                 goto put_and_return;
1631         }
1632
1633         sk_incoming_cpu_update(sk);
1634
1635         bh_lock_sock_nested(sk);
1636         tcp_segs_in(tcp_sk(sk), skb);
1637         ret = 0;
1638         if (!sock_owned_by_user(sk)) {
1639                 skb_to_free = sk->sk_rx_skb_cache;
1640                 sk->sk_rx_skb_cache = NULL;
1641                 ret = tcp_v6_do_rcv(sk, skb);
1642         } else {
1643                 if (tcp_add_backlog(sk, skb))
1644                         goto discard_and_relse;
1645                 skb_to_free = NULL;
1646         }
1647         bh_unlock_sock(sk);
1648         if (skb_to_free)
1649                 __kfree_skb(skb_to_free);
1650 put_and_return:
1651         if (refcounted)
1652                 sock_put(sk);
1653         return ret ? -1 : 0;
1654
1655 no_tcp_socket:
1656         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1657                 goto discard_it;
1658
1659         tcp_v6_fill_cb(skb, hdr, th);
1660
1661         if (tcp_checksum_complete(skb)) {
1662 csum_error:
1663                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1664 bad_packet:
1665                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1666         } else {
1667                 tcp_v6_send_reset(NULL, skb);
1668         }
1669
1670 discard_it:
1671         kfree_skb(skb);
1672         return 0;
1673
1674 discard_and_relse:
1675         sk_drops_add(sk, skb);
1676         if (refcounted)
1677                 sock_put(sk);
1678         goto discard_it;
1679
1680 do_time_wait:
1681         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1682                 inet_twsk_put(inet_twsk(sk));
1683                 goto discard_it;
1684         }
1685
1686         tcp_v6_fill_cb(skb, hdr, th);
1687
1688         if (tcp_checksum_complete(skb)) {
1689                 inet_twsk_put(inet_twsk(sk));
1690                 goto csum_error;
1691         }
1692
1693         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1694         case TCP_TW_SYN:
1695         {
1696                 struct sock *sk2;
1697
1698                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1699                                             skb, __tcp_hdrlen(th),
1700                                             &ipv6_hdr(skb)->saddr, th->source,
1701                                             &ipv6_hdr(skb)->daddr,
1702                                             ntohs(th->dest),
1703                                             tcp_v6_iif_l3_slave(skb),
1704                                             sdif);
1705                 if (sk2) {
1706                         struct inet_timewait_sock *tw = inet_twsk(sk);
1707                         inet_twsk_deschedule_put(tw);
1708                         sk = sk2;
1709                         tcp_v6_restore_cb(skb);
1710                         refcounted = false;
1711                         goto process;
1712                 }
1713         }
1714                 /* to ACK */
1715                 /* fall through */
1716         case TCP_TW_ACK:
1717                 tcp_v6_timewait_ack(sk, skb);
1718                 break;
1719         case TCP_TW_RST:
1720                 tcp_v6_send_reset(sk, skb);
1721                 inet_twsk_deschedule_put(inet_twsk(sk));
1722                 goto discard_it;
1723         case TCP_TW_SUCCESS:
1724                 ;
1725         }
1726         goto discard_it;
1727 }
1728
1729 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1730 {
1731         const struct ipv6hdr *hdr;
1732         const struct tcphdr *th;
1733         struct sock *sk;
1734
1735         if (skb->pkt_type != PACKET_HOST)
1736                 return;
1737
1738         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1739                 return;
1740
1741         hdr = ipv6_hdr(skb);
1742         th = tcp_hdr(skb);
1743
1744         if (th->doff < sizeof(struct tcphdr) / 4)
1745                 return;
1746
1747         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1748         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1749                                         &hdr->saddr, th->source,
1750                                         &hdr->daddr, ntohs(th->dest),
1751                                         inet6_iif(skb), inet6_sdif(skb));
1752         if (sk) {
1753                 skb->sk = sk;
1754                 skb->destructor = sock_edemux;
1755                 if (sk_fullsock(sk)) {
1756                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1757
1758                         if (dst)
1759                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1760                         if (dst &&
1761                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1762                                 skb_dst_set_noref(skb, dst);
1763                 }
1764         }
1765 }
1766
1767 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1768         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1769         .twsk_unique    = tcp_twsk_unique,
1770         .twsk_destructor = tcp_twsk_destructor,
1771 };
1772
1773 static const struct inet_connection_sock_af_ops ipv6_specific = {
1774         .queue_xmit        = inet6_csk_xmit,
1775         .send_check        = tcp_v6_send_check,
1776         .rebuild_header    = inet6_sk_rebuild_header,
1777         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1778         .conn_request      = tcp_v6_conn_request,
1779         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1780         .net_header_len    = sizeof(struct ipv6hdr),
1781         .net_frag_header_len = sizeof(struct frag_hdr),
1782         .setsockopt        = ipv6_setsockopt,
1783         .getsockopt        = ipv6_getsockopt,
1784         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1785         .sockaddr_len      = sizeof(struct sockaddr_in6),
1786 #ifdef CONFIG_COMPAT
1787         .compat_setsockopt = compat_ipv6_setsockopt,
1788         .compat_getsockopt = compat_ipv6_getsockopt,
1789 #endif
1790         .mtu_reduced       = tcp_v6_mtu_reduced,
1791 };
1792
1793 #ifdef CONFIG_TCP_MD5SIG
1794 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1795         .md5_lookup     =       tcp_v6_md5_lookup,
1796         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1797         .md5_parse      =       tcp_v6_parse_md5_keys,
1798 };
1799 #endif
1800
1801 /*
1802  *      TCP over IPv4 via INET6 API
1803  */
1804 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1805         .queue_xmit        = ip_queue_xmit,
1806         .send_check        = tcp_v4_send_check,
1807         .rebuild_header    = inet_sk_rebuild_header,
1808         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1809         .conn_request      = tcp_v6_conn_request,
1810         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1811         .net_header_len    = sizeof(struct iphdr),
1812         .setsockopt        = ipv6_setsockopt,
1813         .getsockopt        = ipv6_getsockopt,
1814         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1815         .sockaddr_len      = sizeof(struct sockaddr_in6),
1816 #ifdef CONFIG_COMPAT
1817         .compat_setsockopt = compat_ipv6_setsockopt,
1818         .compat_getsockopt = compat_ipv6_getsockopt,
1819 #endif
1820         .mtu_reduced       = tcp_v4_mtu_reduced,
1821 };
1822
1823 #ifdef CONFIG_TCP_MD5SIG
1824 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1825         .md5_lookup     =       tcp_v4_md5_lookup,
1826         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1827         .md5_parse      =       tcp_v6_parse_md5_keys,
1828 };
1829 #endif
1830
1831 /* NOTE: A lot of things set to zero explicitly by call to
1832  *       sk_alloc() so need not be done here.
1833  */
1834 static int tcp_v6_init_sock(struct sock *sk)
1835 {
1836         struct inet_connection_sock *icsk = inet_csk(sk);
1837
1838         tcp_init_sock(sk);
1839
1840         icsk->icsk_af_ops = &ipv6_specific;
1841
1842 #ifdef CONFIG_TCP_MD5SIG
1843         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1844 #endif
1845
1846         return 0;
1847 }
1848
1849 static void tcp_v6_destroy_sock(struct sock *sk)
1850 {
1851         tcp_v4_destroy_sock(sk);
1852         inet6_destroy_sock(sk);
1853 }
1854
1855 #ifdef CONFIG_PROC_FS
1856 /* Proc filesystem TCPv6 sock list dumping. */
1857 static void get_openreq6(struct seq_file *seq,
1858                          const struct request_sock *req, int i)
1859 {
1860         long ttd = req->rsk_timer.expires - jiffies;
1861         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1862         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1863
1864         if (ttd < 0)
1865                 ttd = 0;
1866
1867         seq_printf(seq,
1868                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1869                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1870                    i,
1871                    src->s6_addr32[0], src->s6_addr32[1],
1872                    src->s6_addr32[2], src->s6_addr32[3],
1873                    inet_rsk(req)->ir_num,
1874                    dest->s6_addr32[0], dest->s6_addr32[1],
1875                    dest->s6_addr32[2], dest->s6_addr32[3],
1876                    ntohs(inet_rsk(req)->ir_rmt_port),
1877                    TCP_SYN_RECV,
1878                    0, 0, /* could print option size, but that is af dependent. */
1879                    1,   /* timers active (only the expire timer) */
1880                    jiffies_to_clock_t(ttd),
1881                    req->num_timeout,
1882                    from_kuid_munged(seq_user_ns(seq),
1883                                     sock_i_uid(req->rsk_listener)),
1884                    0,  /* non standard timer */
1885                    0, /* open_requests have no inode */
1886                    0, req);
1887 }
1888
1889 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1890 {
1891         const struct in6_addr *dest, *src;
1892         __u16 destp, srcp;
1893         int timer_active;
1894         unsigned long timer_expires;
1895         const struct inet_sock *inet = inet_sk(sp);
1896         const struct tcp_sock *tp = tcp_sk(sp);
1897         const struct inet_connection_sock *icsk = inet_csk(sp);
1898         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1899         int rx_queue;
1900         int state;
1901
1902         dest  = &sp->sk_v6_daddr;
1903         src   = &sp->sk_v6_rcv_saddr;
1904         destp = ntohs(inet->inet_dport);
1905         srcp  = ntohs(inet->inet_sport);
1906
1907         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1908             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1909             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1910                 timer_active    = 1;
1911                 timer_expires   = icsk->icsk_timeout;
1912         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1913                 timer_active    = 4;
1914                 timer_expires   = icsk->icsk_timeout;
1915         } else if (timer_pending(&sp->sk_timer)) {
1916                 timer_active    = 2;
1917                 timer_expires   = sp->sk_timer.expires;
1918         } else {
1919                 timer_active    = 0;
1920                 timer_expires = jiffies;
1921         }
1922
1923         state = inet_sk_state_load(sp);
1924         if (state == TCP_LISTEN)
1925                 rx_queue = sp->sk_ack_backlog;
1926         else
1927                 /* Because we don't lock the socket,
1928                  * we might find a transient negative value.
1929                  */
1930                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1931                                       READ_ONCE(tp->copied_seq), 0);
1932
1933         seq_printf(seq,
1934                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1935                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1936                    i,
1937                    src->s6_addr32[0], src->s6_addr32[1],
1938                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1939                    dest->s6_addr32[0], dest->s6_addr32[1],
1940                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1941                    state,
1942                    READ_ONCE(tp->write_seq) - tp->snd_una,
1943                    rx_queue,
1944                    timer_active,
1945                    jiffies_delta_to_clock_t(timer_expires - jiffies),
1946                    icsk->icsk_retransmits,
1947                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1948                    icsk->icsk_probes_out,
1949                    sock_i_ino(sp),
1950                    refcount_read(&sp->sk_refcnt), sp,
1951                    jiffies_to_clock_t(icsk->icsk_rto),
1952                    jiffies_to_clock_t(icsk->icsk_ack.ato),
1953                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1954                    tp->snd_cwnd,
1955                    state == TCP_LISTEN ?
1956                         fastopenq->max_qlen :
1957                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1958                    );
1959 }
1960
1961 static void get_timewait6_sock(struct seq_file *seq,
1962                                struct inet_timewait_sock *tw, int i)
1963 {
1964         long delta = tw->tw_timer.expires - jiffies;
1965         const struct in6_addr *dest, *src;
1966         __u16 destp, srcp;
1967
1968         dest = &tw->tw_v6_daddr;
1969         src  = &tw->tw_v6_rcv_saddr;
1970         destp = ntohs(tw->tw_dport);
1971         srcp  = ntohs(tw->tw_sport);
1972
1973         seq_printf(seq,
1974                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1976                    i,
1977                    src->s6_addr32[0], src->s6_addr32[1],
1978                    src->s6_addr32[2], src->s6_addr32[3], srcp,
1979                    dest->s6_addr32[0], dest->s6_addr32[1],
1980                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
1981                    tw->tw_substate, 0, 0,
1982                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1983                    refcount_read(&tw->tw_refcnt), tw);
1984 }
1985
1986 static int tcp6_seq_show(struct seq_file *seq, void *v)
1987 {
1988         struct tcp_iter_state *st;
1989         struct sock *sk = v;
1990
1991         if (v == SEQ_START_TOKEN) {
1992                 seq_puts(seq,
1993                          "  sl  "
1994                          "local_address                         "
1995                          "remote_address                        "
1996                          "st tx_queue rx_queue tr tm->when retrnsmt"
1997                          "   uid  timeout inode\n");
1998                 goto out;
1999         }
2000         st = seq->private;
2001
2002         if (sk->sk_state == TCP_TIME_WAIT)
2003                 get_timewait6_sock(seq, v, st->num);
2004         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2005                 get_openreq6(seq, v, st->num);
2006         else
2007                 get_tcp6_sock(seq, v, st->num);
2008 out:
2009         return 0;
2010 }
2011
2012 static const struct seq_operations tcp6_seq_ops = {
2013         .show           = tcp6_seq_show,
2014         .start          = tcp_seq_start,
2015         .next           = tcp_seq_next,
2016         .stop           = tcp_seq_stop,
2017 };
2018
2019 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2020         .family         = AF_INET6,
2021 };
2022
2023 int __net_init tcp6_proc_init(struct net *net)
2024 {
2025         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2026                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2027                 return -ENOMEM;
2028         return 0;
2029 }
2030
2031 void tcp6_proc_exit(struct net *net)
2032 {
2033         remove_proc_entry("tcp6", net->proc_net);
2034 }
2035 #endif
2036
2037 struct proto tcpv6_prot = {
2038         .name                   = "TCPv6",
2039         .owner                  = THIS_MODULE,
2040         .close                  = tcp_close,
2041         .pre_connect            = tcp_v6_pre_connect,
2042         .connect                = tcp_v6_connect,
2043         .disconnect             = tcp_disconnect,
2044         .accept                 = inet_csk_accept,
2045         .ioctl                  = tcp_ioctl,
2046         .init                   = tcp_v6_init_sock,
2047         .destroy                = tcp_v6_destroy_sock,
2048         .shutdown               = tcp_shutdown,
2049         .setsockopt             = tcp_setsockopt,
2050         .getsockopt             = tcp_getsockopt,
2051         .keepalive              = tcp_set_keepalive,
2052         .recvmsg                = tcp_recvmsg,
2053         .sendmsg                = tcp_sendmsg,
2054         .sendpage               = tcp_sendpage,
2055         .backlog_rcv            = tcp_v6_do_rcv,
2056         .release_cb             = tcp_release_cb,
2057         .hash                   = inet6_hash,
2058         .unhash                 = inet_unhash,
2059         .get_port               = inet_csk_get_port,
2060         .enter_memory_pressure  = tcp_enter_memory_pressure,
2061         .leave_memory_pressure  = tcp_leave_memory_pressure,
2062         .stream_memory_free     = tcp_stream_memory_free,
2063         .sockets_allocated      = &tcp_sockets_allocated,
2064         .memory_allocated       = &tcp_memory_allocated,
2065         .memory_pressure        = &tcp_memory_pressure,
2066         .orphan_count           = &tcp_orphan_count,
2067         .sysctl_mem             = sysctl_tcp_mem,
2068         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2069         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2070         .max_header             = MAX_TCP_HEADER,
2071         .obj_size               = sizeof(struct tcp6_sock),
2072         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2073         .twsk_prot              = &tcp6_timewait_sock_ops,
2074         .rsk_prot               = &tcp6_request_sock_ops,
2075         .h.hashinfo             = &tcp_hashinfo,
2076         .no_autobind            = true,
2077 #ifdef CONFIG_COMPAT
2078         .compat_setsockopt      = compat_tcp_setsockopt,
2079         .compat_getsockopt      = compat_tcp_getsockopt,
2080 #endif
2081         .diag_destroy           = tcp_abort,
2082 };
2083
2084 /* thinking of making this const? Don't.
2085  * early_demux can change based on sysctl.
2086  */
2087 static struct inet6_protocol tcpv6_protocol = {
2088         .early_demux    =       tcp_v6_early_demux,
2089         .early_demux_handler =  tcp_v6_early_demux,
2090         .handler        =       tcp_v6_rcv,
2091         .err_handler    =       tcp_v6_err,
2092         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2093 };
2094
2095 static struct inet_protosw tcpv6_protosw = {
2096         .type           =       SOCK_STREAM,
2097         .protocol       =       IPPROTO_TCP,
2098         .prot           =       &tcpv6_prot,
2099         .ops            =       &inet6_stream_ops,
2100         .flags          =       INET_PROTOSW_PERMANENT |
2101                                 INET_PROTOSW_ICSK,
2102 };
2103
2104 static int __net_init tcpv6_net_init(struct net *net)
2105 {
2106         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2107                                     SOCK_RAW, IPPROTO_TCP, net);
2108 }
2109
2110 static void __net_exit tcpv6_net_exit(struct net *net)
2111 {
2112         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2113 }
2114
2115 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2116 {
2117         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2118 }
2119
2120 static struct pernet_operations tcpv6_net_ops = {
2121         .init       = tcpv6_net_init,
2122         .exit       = tcpv6_net_exit,
2123         .exit_batch = tcpv6_net_exit_batch,
2124 };
2125
2126 int __init tcpv6_init(void)
2127 {
2128         int ret;
2129
2130         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2131         if (ret)
2132                 goto out;
2133
2134         /* register inet6 protocol */
2135         ret = inet6_register_protosw(&tcpv6_protosw);
2136         if (ret)
2137                 goto out_tcpv6_protocol;
2138
2139         ret = register_pernet_subsys(&tcpv6_net_ops);
2140         if (ret)
2141                 goto out_tcpv6_protosw;
2142 out:
2143         return ret;
2144
2145 out_tcpv6_protosw:
2146         inet6_unregister_protosw(&tcpv6_protosw);
2147 out_tcpv6_protocol:
2148         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2149         goto out;
2150 }
2151
2152 void tcpv6_exit(void)
2153 {
2154         unregister_pernet_subsys(&tcpv6_net_ops);
2155         inet6_unregister_protosw(&tcpv6_protosw);
2156         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2157 }