GNU Linux-libre 5.10.153-gnu1
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         err = inet6_hash_connect(tcp_death_row, sk);
312         if (err)
313                 goto late_failure;
314
315         sk_set_txhash(sk);
316
317         if (likely(!tp->repair)) {
318                 if (!tp->write_seq)
319                         WRITE_ONCE(tp->write_seq,
320                                    secure_tcpv6_seq(np->saddr.s6_addr32,
321                                                     sk->sk_v6_daddr.s6_addr32,
322                                                     inet->inet_sport,
323                                                     inet->inet_dport));
324                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325                                                    np->saddr.s6_addr32,
326                                                    sk->sk_v6_daddr.s6_addr32);
327         }
328
329         if (tcp_fastopen_defer_connect(sk, &err))
330                 return err;
331         if (err)
332                 goto late_failure;
333
334         err = tcp_connect(sk);
335         if (err)
336                 goto late_failure;
337
338         return 0;
339
340 late_failure:
341         tcp_set_state(sk, TCP_CLOSE);
342 failure:
343         inet->inet_dport = 0;
344         sk->sk_route_caps = 0;
345         return err;
346 }
347
348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350         struct dst_entry *dst;
351         u32 mtu;
352
353         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
354                 return;
355
356         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
357
358         /* Drop requests trying to increase our current mss.
359          * Check done in __ip6_rt_update_pmtu() is too late.
360          */
361         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
362                 return;
363
364         dst = inet6_csk_update_pmtu(sk, mtu);
365         if (!dst)
366                 return;
367
368         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369                 tcp_sync_mss(sk, dst_mtu(dst));
370                 tcp_simple_retransmit(sk);
371         }
372 }
373
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375                 u8 type, u8 code, int offset, __be32 info)
376 {
377         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379         struct net *net = dev_net(skb->dev);
380         struct request_sock *fastopen;
381         struct ipv6_pinfo *np;
382         struct tcp_sock *tp;
383         __u32 seq, snd_una;
384         struct sock *sk;
385         bool fatal;
386         int err;
387
388         sk = __inet6_lookup_established(net, &tcp_hashinfo,
389                                         &hdr->daddr, th->dest,
390                                         &hdr->saddr, ntohs(th->source),
391                                         skb->dev->ifindex, inet6_sdif(skb));
392
393         if (!sk) {
394                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
395                                   ICMP6_MIB_INERRORS);
396                 return -ENOENT;
397         }
398
399         if (sk->sk_state == TCP_TIME_WAIT) {
400                 inet_twsk_put(inet_twsk(sk));
401                 return 0;
402         }
403         seq = ntohl(th->seq);
404         fatal = icmpv6_err_convert(type, code, &err);
405         if (sk->sk_state == TCP_NEW_SYN_RECV) {
406                 tcp_req_err(sk, seq, fatal);
407                 return 0;
408         }
409
410         bh_lock_sock(sk);
411         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
413
414         if (sk->sk_state == TCP_CLOSE)
415                 goto out;
416
417         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
419                 goto out;
420         }
421
422         tp = tcp_sk(sk);
423         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424         fastopen = rcu_dereference(tp->fastopen_rsk);
425         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426         if (sk->sk_state != TCP_LISTEN &&
427             !between(seq, snd_una, tp->snd_nxt)) {
428                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
429                 goto out;
430         }
431
432         np = tcp_inet6_sk(sk);
433
434         if (type == NDISC_REDIRECT) {
435                 if (!sock_owned_by_user(sk)) {
436                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
437
438                         if (dst)
439                                 dst->ops->redirect(dst, sk, skb);
440                 }
441                 goto out;
442         }
443
444         if (type == ICMPV6_PKT_TOOBIG) {
445                 u32 mtu = ntohl(info);
446
447                 /* We are not interested in TCP_LISTEN and open_requests
448                  * (SYN-ACKs send out by Linux are always <576bytes so
449                  * they should go through unfragmented).
450                  */
451                 if (sk->sk_state == TCP_LISTEN)
452                         goto out;
453
454                 if (!ip6_sk_accept_pmtu(sk))
455                         goto out;
456
457                 if (mtu < IPV6_MIN_MTU)
458                         goto out;
459
460                 WRITE_ONCE(tp->mtu_info, mtu);
461
462                 if (!sock_owned_by_user(sk))
463                         tcp_v6_mtu_reduced(sk);
464                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
465                                            &sk->sk_tsq_flags))
466                         sock_hold(sk);
467                 goto out;
468         }
469
470
471         /* Might be for an request_sock */
472         switch (sk->sk_state) {
473         case TCP_SYN_SENT:
474         case TCP_SYN_RECV:
475                 /* Only in fast or simultaneous open. If a fast open socket is
476                  * already accepted it is treated as a connected one below.
477                  */
478                 if (fastopen && !fastopen->sk)
479                         break;
480
481                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
482
483                 if (!sock_owned_by_user(sk)) {
484                         sk->sk_err = err;
485                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
486
487                         tcp_done(sk);
488                 } else
489                         sk->sk_err_soft = err;
490                 goto out;
491         case TCP_LISTEN:
492                 break;
493         default:
494                 /* check if this ICMP message allows revert of backoff.
495                  * (see RFC 6069)
496                  */
497                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
498                     code == ICMPV6_NOROUTE)
499                         tcp_ld_RTO_revert(sk, seq);
500         }
501
502         if (!sock_owned_by_user(sk) && np->recverr) {
503                 sk->sk_err = err;
504                 sk->sk_error_report(sk);
505         } else
506                 sk->sk_err_soft = err;
507
508 out:
509         bh_unlock_sock(sk);
510         sock_put(sk);
511         return 0;
512 }
513
514
515 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
516                               struct flowi *fl,
517                               struct request_sock *req,
518                               struct tcp_fastopen_cookie *foc,
519                               enum tcp_synack_type synack_type,
520                               struct sk_buff *syn_skb)
521 {
522         struct inet_request_sock *ireq = inet_rsk(req);
523         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524         struct ipv6_txoptions *opt;
525         struct flowi6 *fl6 = &fl->u.ip6;
526         struct sk_buff *skb;
527         int err = -ENOMEM;
528         u8 tclass;
529
530         /* First, grab a route. */
531         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
532                                                IPPROTO_TCP)) == NULL)
533                 goto done;
534
535         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
536
537         if (skb) {
538                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
539                                     &ireq->ir_v6_rmt_addr);
540
541                 fl6->daddr = ireq->ir_v6_rmt_addr;
542                 if (np->repflow && ireq->pktopts)
543                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
544
545                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
546                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
547                                 (np->tclass & INET_ECN_MASK) :
548                                 np->tclass;
549
550                 if (!INET_ECN_is_capable(tclass) &&
551                     tcp_bpf_ca_needs_ecn((struct sock *)req))
552                         tclass |= INET_ECN_ECT_0;
553
554                 rcu_read_lock();
555                 opt = ireq->ipv6_opt;
556                 if (!opt)
557                         opt = rcu_dereference(np->opt);
558                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559                                tclass, sk->sk_priority);
560                 rcu_read_unlock();
561                 err = net_xmit_eval(err);
562         }
563
564 done:
565         return err;
566 }
567
568
569 static void tcp_v6_reqsk_destructor(struct request_sock *req)
570 {
571         kfree(inet_rsk(req)->ipv6_opt);
572         kfree_skb(inet_rsk(req)->pktopts);
573 }
574
575 #ifdef CONFIG_TCP_MD5SIG
576 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577                                                    const struct in6_addr *addr,
578                                                    int l3index)
579 {
580         return tcp_md5_do_lookup(sk, l3index,
581                                  (union tcp_md5_addr *)addr, AF_INET6);
582 }
583
584 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585                                                 const struct sock *addr_sk)
586 {
587         int l3index;
588
589         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
590                                                  addr_sk->sk_bound_dev_if);
591         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
592                                     l3index);
593 }
594
595 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596                                  sockptr_t optval, int optlen)
597 {
598         struct tcp_md5sig cmd;
599         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
600         int l3index = 0;
601         u8 prefixlen;
602
603         if (optlen < sizeof(cmd))
604                 return -EINVAL;
605
606         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
607                 return -EFAULT;
608
609         if (sin6->sin6_family != AF_INET6)
610                 return -EINVAL;
611
612         if (optname == TCP_MD5SIG_EXT &&
613             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
614                 prefixlen = cmd.tcpm_prefixlen;
615                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
616                                         prefixlen > 32))
617                         return -EINVAL;
618         } else {
619                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
620         }
621
622         if (optname == TCP_MD5SIG_EXT &&
623             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
624                 struct net_device *dev;
625
626                 rcu_read_lock();
627                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
628                 if (dev && netif_is_l3_master(dev))
629                         l3index = dev->ifindex;
630                 rcu_read_unlock();
631
632                 /* ok to reference set/not set outside of rcu;
633                  * right now device MUST be an L3 master
634                  */
635                 if (!dev || !l3index)
636                         return -EINVAL;
637         }
638
639         if (!cmd.tcpm_keylen) {
640                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
641                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
642                                               AF_INET, prefixlen,
643                                               l3index);
644                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
645                                       AF_INET6, prefixlen, l3index);
646         }
647
648         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
649                 return -EINVAL;
650
651         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
652                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653                                       AF_INET, prefixlen, l3index,
654                                       cmd.tcpm_key, cmd.tcpm_keylen,
655                                       GFP_KERNEL);
656
657         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658                               AF_INET6, prefixlen, l3index,
659                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
660 }
661
662 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
663                                    const struct in6_addr *daddr,
664                                    const struct in6_addr *saddr,
665                                    const struct tcphdr *th, int nbytes)
666 {
667         struct tcp6_pseudohdr *bp;
668         struct scatterlist sg;
669         struct tcphdr *_th;
670
671         bp = hp->scratch;
672         /* 1. TCP pseudo-header (RFC2460) */
673         bp->saddr = *saddr;
674         bp->daddr = *daddr;
675         bp->protocol = cpu_to_be32(IPPROTO_TCP);
676         bp->len = cpu_to_be32(nbytes);
677
678         _th = (struct tcphdr *)(bp + 1);
679         memcpy(_th, th, sizeof(*th));
680         _th->check = 0;
681
682         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
683         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
684                                 sizeof(*bp) + sizeof(*th));
685         return crypto_ahash_update(hp->md5_req);
686 }
687
688 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
689                                const struct in6_addr *daddr, struct in6_addr *saddr,
690                                const struct tcphdr *th)
691 {
692         struct tcp_md5sig_pool *hp;
693         struct ahash_request *req;
694
695         hp = tcp_get_md5sig_pool();
696         if (!hp)
697                 goto clear_hash_noput;
698         req = hp->md5_req;
699
700         if (crypto_ahash_init(req))
701                 goto clear_hash;
702         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
703                 goto clear_hash;
704         if (tcp_md5_hash_key(hp, key))
705                 goto clear_hash;
706         ahash_request_set_crypt(req, NULL, md5_hash, 0);
707         if (crypto_ahash_final(req))
708                 goto clear_hash;
709
710         tcp_put_md5sig_pool();
711         return 0;
712
713 clear_hash:
714         tcp_put_md5sig_pool();
715 clear_hash_noput:
716         memset(md5_hash, 0, 16);
717         return 1;
718 }
719
720 static int tcp_v6_md5_hash_skb(char *md5_hash,
721                                const struct tcp_md5sig_key *key,
722                                const struct sock *sk,
723                                const struct sk_buff *skb)
724 {
725         const struct in6_addr *saddr, *daddr;
726         struct tcp_md5sig_pool *hp;
727         struct ahash_request *req;
728         const struct tcphdr *th = tcp_hdr(skb);
729
730         if (sk) { /* valid for establish/request sockets */
731                 saddr = &sk->sk_v6_rcv_saddr;
732                 daddr = &sk->sk_v6_daddr;
733         } else {
734                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
735                 saddr = &ip6h->saddr;
736                 daddr = &ip6h->daddr;
737         }
738
739         hp = tcp_get_md5sig_pool();
740         if (!hp)
741                 goto clear_hash_noput;
742         req = hp->md5_req;
743
744         if (crypto_ahash_init(req))
745                 goto clear_hash;
746
747         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
748                 goto clear_hash;
749         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
750                 goto clear_hash;
751         if (tcp_md5_hash_key(hp, key))
752                 goto clear_hash;
753         ahash_request_set_crypt(req, NULL, md5_hash, 0);
754         if (crypto_ahash_final(req))
755                 goto clear_hash;
756
757         tcp_put_md5sig_pool();
758         return 0;
759
760 clear_hash:
761         tcp_put_md5sig_pool();
762 clear_hash_noput:
763         memset(md5_hash, 0, 16);
764         return 1;
765 }
766
767 #endif
768
769 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
770                                     const struct sk_buff *skb,
771                                     int dif, int sdif)
772 {
773 #ifdef CONFIG_TCP_MD5SIG
774         const __u8 *hash_location = NULL;
775         struct tcp_md5sig_key *hash_expected;
776         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
777         const struct tcphdr *th = tcp_hdr(skb);
778         int genhash, l3index;
779         u8 newhash[16];
780
781         /* sdif set, means packet ingressed via a device
782          * in an L3 domain and dif is set to the l3mdev
783          */
784         l3index = sdif ? dif : 0;
785
786         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
787         hash_location = tcp_parse_md5sig_option(th);
788
789         /* We've parsed the options - do we have a hash? */
790         if (!hash_expected && !hash_location)
791                 return false;
792
793         if (hash_expected && !hash_location) {
794                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
795                 return true;
796         }
797
798         if (!hash_expected && hash_location) {
799                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
800                 return true;
801         }
802
803         /* check the signature */
804         genhash = tcp_v6_md5_hash_skb(newhash,
805                                       hash_expected,
806                                       NULL, skb);
807
808         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
809                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
810                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
811                                      genhash ? "failed" : "mismatch",
812                                      &ip6h->saddr, ntohs(th->source),
813                                      &ip6h->daddr, ntohs(th->dest), l3index);
814                 return true;
815         }
816 #endif
817         return false;
818 }
819
820 static void tcp_v6_init_req(struct request_sock *req,
821                             const struct sock *sk_listener,
822                             struct sk_buff *skb)
823 {
824         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
825         struct inet_request_sock *ireq = inet_rsk(req);
826         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
827
828         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
829         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
830
831         /* So that link locals have meaning */
832         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
833             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
834                 ireq->ir_iif = tcp_v6_iif(skb);
835
836         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
837             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
838              np->rxopt.bits.rxinfo ||
839              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
840              np->rxopt.bits.rxohlim || np->repflow)) {
841                 refcount_inc(&skb->users);
842                 ireq->pktopts = skb;
843         }
844 }
845
846 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
847                                           struct flowi *fl,
848                                           const struct request_sock *req)
849 {
850         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
851 }
852
853 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
854         .family         =       AF_INET6,
855         .obj_size       =       sizeof(struct tcp6_request_sock),
856         .rtx_syn_ack    =       tcp_rtx_synack,
857         .send_ack       =       tcp_v6_reqsk_send_ack,
858         .destructor     =       tcp_v6_reqsk_destructor,
859         .send_reset     =       tcp_v6_send_reset,
860         .syn_ack_timeout =      tcp_syn_ack_timeout,
861 };
862
863 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
864         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
865                                 sizeof(struct ipv6hdr),
866 #ifdef CONFIG_TCP_MD5SIG
867         .req_md5_lookup =       tcp_v6_md5_lookup,
868         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
869 #endif
870         .init_req       =       tcp_v6_init_req,
871 #ifdef CONFIG_SYN_COOKIES
872         .cookie_init_seq =      cookie_v6_init_sequence,
873 #endif
874         .route_req      =       tcp_v6_route_req,
875         .init_seq       =       tcp_v6_init_seq,
876         .init_ts_off    =       tcp_v6_init_ts_off,
877         .send_synack    =       tcp_v6_send_synack,
878 };
879
880 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
881                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
882                                  int oif, struct tcp_md5sig_key *key, int rst,
883                                  u8 tclass, __be32 label, u32 priority)
884 {
885         const struct tcphdr *th = tcp_hdr(skb);
886         struct tcphdr *t1;
887         struct sk_buff *buff;
888         struct flowi6 fl6;
889         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
890         struct sock *ctl_sk = net->ipv6.tcp_sk;
891         unsigned int tot_len = sizeof(struct tcphdr);
892         struct dst_entry *dst;
893         __be32 *topt;
894         __u32 mark = 0;
895
896         if (tsecr)
897                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
898 #ifdef CONFIG_TCP_MD5SIG
899         if (key)
900                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
901 #endif
902
903         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
904                          GFP_ATOMIC);
905         if (!buff)
906                 return;
907
908         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
909
910         t1 = skb_push(buff, tot_len);
911         skb_reset_transport_header(buff);
912
913         /* Swap the send and the receive. */
914         memset(t1, 0, sizeof(*t1));
915         t1->dest = th->source;
916         t1->source = th->dest;
917         t1->doff = tot_len / 4;
918         t1->seq = htonl(seq);
919         t1->ack_seq = htonl(ack);
920         t1->ack = !rst || !th->ack;
921         t1->rst = rst;
922         t1->window = htons(win);
923
924         topt = (__be32 *)(t1 + 1);
925
926         if (tsecr) {
927                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
928                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
929                 *topt++ = htonl(tsval);
930                 *topt++ = htonl(tsecr);
931         }
932
933 #ifdef CONFIG_TCP_MD5SIG
934         if (key) {
935                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
936                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
937                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
938                                     &ipv6_hdr(skb)->saddr,
939                                     &ipv6_hdr(skb)->daddr, t1);
940         }
941 #endif
942
943         memset(&fl6, 0, sizeof(fl6));
944         fl6.daddr = ipv6_hdr(skb)->saddr;
945         fl6.saddr = ipv6_hdr(skb)->daddr;
946         fl6.flowlabel = label;
947
948         buff->ip_summed = CHECKSUM_PARTIAL;
949         buff->csum = 0;
950
951         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
952
953         fl6.flowi6_proto = IPPROTO_TCP;
954         if (rt6_need_strict(&fl6.daddr) && !oif)
955                 fl6.flowi6_oif = tcp_v6_iif(skb);
956         else {
957                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
958                         oif = skb->skb_iif;
959
960                 fl6.flowi6_oif = oif;
961         }
962
963         if (sk) {
964                 if (sk->sk_state == TCP_TIME_WAIT) {
965                         mark = inet_twsk(sk)->tw_mark;
966                         /* autoflowlabel relies on buff->hash */
967                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
968                                      PKT_HASH_TYPE_L4);
969                 } else {
970                         mark = sk->sk_mark;
971                 }
972                 buff->tstamp = tcp_transmit_time(sk);
973         }
974         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
975         fl6.fl6_dport = t1->dest;
976         fl6.fl6_sport = t1->source;
977         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
978         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
979
980         /* Pass a socket to ip6_dst_lookup either it is for RST
981          * Underlying function will use this to retrieve the network
982          * namespace
983          */
984         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
985         if (!IS_ERR(dst)) {
986                 skb_dst_set(buff, dst);
987                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
988                          tclass & ~INET_ECN_MASK, priority);
989                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
990                 if (rst)
991                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
992                 return;
993         }
994
995         kfree_skb(buff);
996 }
997
998 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
999 {
1000         const struct tcphdr *th = tcp_hdr(skb);
1001         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1002         u32 seq = 0, ack_seq = 0;
1003         struct tcp_md5sig_key *key = NULL;
1004 #ifdef CONFIG_TCP_MD5SIG
1005         const __u8 *hash_location = NULL;
1006         unsigned char newhash[16];
1007         int genhash;
1008         struct sock *sk1 = NULL;
1009 #endif
1010         __be32 label = 0;
1011         u32 priority = 0;
1012         struct net *net;
1013         int oif = 0;
1014
1015         if (th->rst)
1016                 return;
1017
1018         /* If sk not NULL, it means we did a successful lookup and incoming
1019          * route had to be correct. prequeue might have dropped our dst.
1020          */
1021         if (!sk && !ipv6_unicast_destination(skb))
1022                 return;
1023
1024         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1025 #ifdef CONFIG_TCP_MD5SIG
1026         rcu_read_lock();
1027         hash_location = tcp_parse_md5sig_option(th);
1028         if (sk && sk_fullsock(sk)) {
1029                 int l3index;
1030
1031                 /* sdif set, means packet ingressed via a device
1032                  * in an L3 domain and inet_iif is set to it.
1033                  */
1034                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1035                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1036         } else if (hash_location) {
1037                 int dif = tcp_v6_iif_l3_slave(skb);
1038                 int sdif = tcp_v6_sdif(skb);
1039                 int l3index;
1040
1041                 /*
1042                  * active side is lost. Try to find listening socket through
1043                  * source port, and then find md5 key through listening socket.
1044                  * we are not loose security here:
1045                  * Incoming packet is checked with md5 hash with finding key,
1046                  * no RST generated if md5 hash doesn't match.
1047                  */
1048                 sk1 = inet6_lookup_listener(net,
1049                                            &tcp_hashinfo, NULL, 0,
1050                                            &ipv6h->saddr,
1051                                            th->source, &ipv6h->daddr,
1052                                            ntohs(th->source), dif, sdif);
1053                 if (!sk1)
1054                         goto out;
1055
1056                 /* sdif set, means packet ingressed via a device
1057                  * in an L3 domain and dif is set to it.
1058                  */
1059                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1060
1061                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1062                 if (!key)
1063                         goto out;
1064
1065                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1066                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1067                         goto out;
1068         }
1069 #endif
1070
1071         if (th->ack)
1072                 seq = ntohl(th->ack_seq);
1073         else
1074                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1075                           (th->doff << 2);
1076
1077         if (sk) {
1078                 oif = sk->sk_bound_dev_if;
1079                 if (sk_fullsock(sk)) {
1080                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1081
1082                         trace_tcp_send_reset(sk, skb);
1083                         if (np->repflow)
1084                                 label = ip6_flowlabel(ipv6h);
1085                         priority = sk->sk_priority;
1086                 }
1087                 if (sk->sk_state == TCP_TIME_WAIT) {
1088                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1089                         priority = inet_twsk(sk)->tw_priority;
1090                 }
1091         } else {
1092                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1093                         label = ip6_flowlabel(ipv6h);
1094         }
1095
1096         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1097                              ipv6_get_dsfield(ipv6h), label, priority);
1098
1099 #ifdef CONFIG_TCP_MD5SIG
1100 out:
1101         rcu_read_unlock();
1102 #endif
1103 }
1104
1105 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1106                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1107                             struct tcp_md5sig_key *key, u8 tclass,
1108                             __be32 label, u32 priority)
1109 {
1110         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1111                              tclass, label, priority);
1112 }
1113
1114 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1115 {
1116         struct inet_timewait_sock *tw = inet_twsk(sk);
1117         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1118
1119         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1120                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1121                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1122                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1123                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1124
1125         inet_twsk_put(tw);
1126 }
1127
1128 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1129                                   struct request_sock *req)
1130 {
1131         int l3index;
1132
1133         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1134
1135         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1136          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1137          */
1138         /* RFC 7323 2.3
1139          * The window field (SEG.WND) of every outgoing segment, with the
1140          * exception of <SYN> segments, MUST be right-shifted by
1141          * Rcv.Wind.Shift bits:
1142          */
1143         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1144                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1145                         tcp_rsk(req)->rcv_nxt,
1146                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1147                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1148                         req->ts_recent, sk->sk_bound_dev_if,
1149                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1150                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1151 }
1152
1153
1154 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1155 {
1156 #ifdef CONFIG_SYN_COOKIES
1157         const struct tcphdr *th = tcp_hdr(skb);
1158
1159         if (!th->syn)
1160                 sk = cookie_v6_check(sk, skb);
1161 #endif
1162         return sk;
1163 }
1164
1165 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1166                          struct tcphdr *th, u32 *cookie)
1167 {
1168         u16 mss = 0;
1169 #ifdef CONFIG_SYN_COOKIES
1170         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1171                                     &tcp_request_sock_ipv6_ops, sk, th);
1172         if (mss) {
1173                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1174                 tcp_synq_overflow(sk);
1175         }
1176 #endif
1177         return mss;
1178 }
1179
1180 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1181 {
1182         if (skb->protocol == htons(ETH_P_IP))
1183                 return tcp_v4_conn_request(sk, skb);
1184
1185         if (!ipv6_unicast_destination(skb))
1186                 goto drop;
1187
1188         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1189                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1190                 return 0;
1191         }
1192
1193         return tcp_conn_request(&tcp6_request_sock_ops,
1194                                 &tcp_request_sock_ipv6_ops, sk, skb);
1195
1196 drop:
1197         tcp_listendrop(sk);
1198         return 0; /* don't send reset */
1199 }
1200
1201 static void tcp_v6_restore_cb(struct sk_buff *skb)
1202 {
1203         /* We need to move header back to the beginning if xfrm6_policy_check()
1204          * and tcp_v6_fill_cb() are going to be called again.
1205          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1206          */
1207         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1208                 sizeof(struct inet6_skb_parm));
1209 }
1210
1211 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1212                                          struct request_sock *req,
1213                                          struct dst_entry *dst,
1214                                          struct request_sock *req_unhash,
1215                                          bool *own_req)
1216 {
1217         struct inet_request_sock *ireq;
1218         struct ipv6_pinfo *newnp;
1219         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1220         struct ipv6_txoptions *opt;
1221         struct inet_sock *newinet;
1222         bool found_dup_sk = false;
1223         struct tcp_sock *newtp;
1224         struct sock *newsk;
1225 #ifdef CONFIG_TCP_MD5SIG
1226         struct tcp_md5sig_key *key;
1227         int l3index;
1228 #endif
1229         struct flowi6 fl6;
1230
1231         if (skb->protocol == htons(ETH_P_IP)) {
1232                 /*
1233                  *      v6 mapped
1234                  */
1235
1236                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1237                                              req_unhash, own_req);
1238
1239                 if (!newsk)
1240                         return NULL;
1241
1242                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1243
1244                 newinet = inet_sk(newsk);
1245                 newnp = tcp_inet6_sk(newsk);
1246                 newtp = tcp_sk(newsk);
1247
1248                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1249
1250                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1251
1252                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1253                 if (sk_is_mptcp(newsk))
1254                         mptcpv6_handle_mapped(newsk, true);
1255                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1256 #ifdef CONFIG_TCP_MD5SIG
1257                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1258 #endif
1259
1260                 newnp->ipv6_mc_list = NULL;
1261                 newnp->ipv6_ac_list = NULL;
1262                 newnp->ipv6_fl_list = NULL;
1263                 newnp->pktoptions  = NULL;
1264                 newnp->opt         = NULL;
1265                 newnp->mcast_oif   = inet_iif(skb);
1266                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1267                 newnp->rcv_flowinfo = 0;
1268                 if (np->repflow)
1269                         newnp->flow_label = 0;
1270
1271                 /*
1272                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1273                  * here, tcp_create_openreq_child now does this for us, see the comment in
1274                  * that function for the gory details. -acme
1275                  */
1276
1277                 /* It is tricky place. Until this moment IPv4 tcp
1278                    worked with IPv6 icsk.icsk_af_ops.
1279                    Sync it now.
1280                  */
1281                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1282
1283                 return newsk;
1284         }
1285
1286         ireq = inet_rsk(req);
1287
1288         if (sk_acceptq_is_full(sk))
1289                 goto out_overflow;
1290
1291         if (!dst) {
1292                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1293                 if (!dst)
1294                         goto out;
1295         }
1296
1297         newsk = tcp_create_openreq_child(sk, req, skb);
1298         if (!newsk)
1299                 goto out_nonewsk;
1300
1301         /*
1302          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1303          * count here, tcp_create_openreq_child now does this for us, see the
1304          * comment in that function for the gory details. -acme
1305          */
1306
1307         newsk->sk_gso_type = SKB_GSO_TCPV6;
1308         ip6_dst_store(newsk, dst, NULL, NULL);
1309         inet6_sk_rx_dst_set(newsk, skb);
1310
1311         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1312
1313         newtp = tcp_sk(newsk);
1314         newinet = inet_sk(newsk);
1315         newnp = tcp_inet6_sk(newsk);
1316
1317         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1318
1319         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1320         newnp->saddr = ireq->ir_v6_loc_addr;
1321         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1322         newsk->sk_bound_dev_if = ireq->ir_iif;
1323
1324         /* Now IPv6 options...
1325
1326            First: no IPv4 options.
1327          */
1328         newinet->inet_opt = NULL;
1329         newnp->ipv6_mc_list = NULL;
1330         newnp->ipv6_ac_list = NULL;
1331         newnp->ipv6_fl_list = NULL;
1332
1333         /* Clone RX bits */
1334         newnp->rxopt.all = np->rxopt.all;
1335
1336         newnp->pktoptions = NULL;
1337         newnp->opt        = NULL;
1338         newnp->mcast_oif  = tcp_v6_iif(skb);
1339         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1340         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1341         if (np->repflow)
1342                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1343
1344         /* Set ToS of the new socket based upon the value of incoming SYN.
1345          * ECT bits are set later in tcp_init_transfer().
1346          */
1347         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1348                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1349
1350         /* Clone native IPv6 options from listening socket (if any)
1351
1352            Yes, keeping reference count would be much more clever,
1353            but we make one more one thing there: reattach optmem
1354            to newsk.
1355          */
1356         opt = ireq->ipv6_opt;
1357         if (!opt)
1358                 opt = rcu_dereference(np->opt);
1359         if (opt) {
1360                 opt = ipv6_dup_options(newsk, opt);
1361                 RCU_INIT_POINTER(newnp->opt, opt);
1362         }
1363         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1364         if (opt)
1365                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1366                                                     opt->opt_flen;
1367
1368         tcp_ca_openreq_child(newsk, dst);
1369
1370         tcp_sync_mss(newsk, dst_mtu(dst));
1371         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1372
1373         tcp_initialize_rcv_mss(newsk);
1374
1375         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1376         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1377
1378 #ifdef CONFIG_TCP_MD5SIG
1379         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1380
1381         /* Copy over the MD5 key from the original socket */
1382         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1383         if (key) {
1384                 /* We're using one, so create a matching key
1385                  * on the newsk structure. If we fail to get
1386                  * memory, then we end up not copying the key
1387                  * across. Shucks.
1388                  */
1389                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1390                                AF_INET6, 128, l3index, key->key, key->keylen,
1391                                sk_gfp_mask(sk, GFP_ATOMIC));
1392         }
1393 #endif
1394
1395         if (__inet_inherit_port(sk, newsk) < 0) {
1396                 inet_csk_prepare_forced_close(newsk);
1397                 tcp_done(newsk);
1398                 goto out;
1399         }
1400         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1401                                        &found_dup_sk);
1402         if (*own_req) {
1403                 tcp_move_syn(newtp, req);
1404
1405                 /* Clone pktoptions received with SYN, if we own the req */
1406                 if (ireq->pktopts) {
1407                         newnp->pktoptions = skb_clone(ireq->pktopts,
1408                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1409                         consume_skb(ireq->pktopts);
1410                         ireq->pktopts = NULL;
1411                         if (newnp->pktoptions) {
1412                                 tcp_v6_restore_cb(newnp->pktoptions);
1413                                 skb_set_owner_r(newnp->pktoptions, newsk);
1414                         }
1415                 }
1416         } else {
1417                 if (!req_unhash && found_dup_sk) {
1418                         /* This code path should only be executed in the
1419                          * syncookie case only
1420                          */
1421                         bh_unlock_sock(newsk);
1422                         sock_put(newsk);
1423                         newsk = NULL;
1424                 }
1425         }
1426
1427         return newsk;
1428
1429 out_overflow:
1430         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1431 out_nonewsk:
1432         dst_release(dst);
1433 out:
1434         tcp_listendrop(sk);
1435         return NULL;
1436 }
1437
1438 /* The socket must have it's spinlock held when we get
1439  * here, unless it is a TCP_LISTEN socket.
1440  *
1441  * We have a potential double-lock case here, so even when
1442  * doing backlog processing we use the BH locking scheme.
1443  * This is because we cannot sleep with the original spinlock
1444  * held.
1445  */
1446 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1447 {
1448         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1449         struct sk_buff *opt_skb = NULL;
1450         struct tcp_sock *tp;
1451
1452         /* Imagine: socket is IPv6. IPv4 packet arrives,
1453            goes to IPv4 receive handler and backlogged.
1454            From backlog it always goes here. Kerboom...
1455            Fortunately, tcp_rcv_established and rcv_established
1456            handle them correctly, but it is not case with
1457            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1458          */
1459
1460         if (skb->protocol == htons(ETH_P_IP))
1461                 return tcp_v4_do_rcv(sk, skb);
1462
1463         /*
1464          *      socket locking is here for SMP purposes as backlog rcv
1465          *      is currently called with bh processing disabled.
1466          */
1467
1468         /* Do Stevens' IPV6_PKTOPTIONS.
1469
1470            Yes, guys, it is the only place in our code, where we
1471            may make it not affecting IPv4.
1472            The rest of code is protocol independent,
1473            and I do not like idea to uglify IPv4.
1474
1475            Actually, all the idea behind IPV6_PKTOPTIONS
1476            looks not very well thought. For now we latch
1477            options, received in the last packet, enqueued
1478            by tcp. Feel free to propose better solution.
1479                                                --ANK (980728)
1480          */
1481         if (np->rxopt.all)
1482                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1483
1484         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485                 struct dst_entry *dst;
1486
1487                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1488                                                 lockdep_sock_is_held(sk));
1489
1490                 sock_rps_save_rxhash(sk, skb);
1491                 sk_mark_napi_id(sk, skb);
1492                 if (dst) {
1493                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1494                             dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1495                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1496                                 dst_release(dst);
1497                         }
1498                 }
1499
1500                 tcp_rcv_established(sk, skb);
1501                 if (opt_skb)
1502                         goto ipv6_pktoptions;
1503                 return 0;
1504         }
1505
1506         if (tcp_checksum_complete(skb))
1507                 goto csum_err;
1508
1509         if (sk->sk_state == TCP_LISTEN) {
1510                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1511
1512                 if (!nsk)
1513                         goto discard;
1514
1515                 if (nsk != sk) {
1516                         if (tcp_child_process(sk, nsk, skb))
1517                                 goto reset;
1518                         if (opt_skb)
1519                                 __kfree_skb(opt_skb);
1520                         return 0;
1521                 }
1522         } else
1523                 sock_rps_save_rxhash(sk, skb);
1524
1525         if (tcp_rcv_state_process(sk, skb))
1526                 goto reset;
1527         if (opt_skb)
1528                 goto ipv6_pktoptions;
1529         return 0;
1530
1531 reset:
1532         tcp_v6_send_reset(sk, skb);
1533 discard:
1534         if (opt_skb)
1535                 __kfree_skb(opt_skb);
1536         kfree_skb(skb);
1537         return 0;
1538 csum_err:
1539         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1540         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1541         goto discard;
1542
1543
1544 ipv6_pktoptions:
1545         /* Do you ask, what is it?
1546
1547            1. skb was enqueued by tcp.
1548            2. skb is added to tail of read queue, rather than out of order.
1549            3. socket is not in passive state.
1550            4. Finally, it really contains options, which user wants to receive.
1551          */
1552         tp = tcp_sk(sk);
1553         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1554             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1555                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1556                         np->mcast_oif = tcp_v6_iif(opt_skb);
1557                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1558                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1559                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1560                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1561                 if (np->repflow)
1562                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1563                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1564                         skb_set_owner_r(opt_skb, sk);
1565                         tcp_v6_restore_cb(opt_skb);
1566                         opt_skb = xchg(&np->pktoptions, opt_skb);
1567                 } else {
1568                         __kfree_skb(opt_skb);
1569                         opt_skb = xchg(&np->pktoptions, NULL);
1570                 }
1571         }
1572
1573         kfree_skb(opt_skb);
1574         return 0;
1575 }
1576
1577 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1578                            const struct tcphdr *th)
1579 {
1580         /* This is tricky: we move IP6CB at its correct location into
1581          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1582          * _decode_session6() uses IP6CB().
1583          * barrier() makes sure compiler won't play aliasing games.
1584          */
1585         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1586                 sizeof(struct inet6_skb_parm));
1587         barrier();
1588
1589         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591                                     skb->len - th->doff*4);
1592         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1594         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1595         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1596         TCP_SKB_CB(skb)->sacked = 0;
1597         TCP_SKB_CB(skb)->has_rxtstamp =
1598                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1599 }
1600
1601 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1602 {
1603         struct sk_buff *skb_to_free;
1604         int sdif = inet6_sdif(skb);
1605         int dif = inet6_iif(skb);
1606         const struct tcphdr *th;
1607         const struct ipv6hdr *hdr;
1608         bool refcounted;
1609         struct sock *sk;
1610         int ret;
1611         struct net *net = dev_net(skb->dev);
1612
1613         if (skb->pkt_type != PACKET_HOST)
1614                 goto discard_it;
1615
1616         /*
1617          *      Count it even if it's bad.
1618          */
1619         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1620
1621         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1622                 goto discard_it;
1623
1624         th = (const struct tcphdr *)skb->data;
1625
1626         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1627                 goto bad_packet;
1628         if (!pskb_may_pull(skb, th->doff*4))
1629                 goto discard_it;
1630
1631         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1632                 goto csum_error;
1633
1634         th = (const struct tcphdr *)skb->data;
1635         hdr = ipv6_hdr(skb);
1636
1637 lookup:
1638         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1639                                 th->source, th->dest, inet6_iif(skb), sdif,
1640                                 &refcounted);
1641         if (!sk)
1642                 goto no_tcp_socket;
1643
1644 process:
1645         if (sk->sk_state == TCP_TIME_WAIT)
1646                 goto do_time_wait;
1647
1648         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1649                 struct request_sock *req = inet_reqsk(sk);
1650                 bool req_stolen = false;
1651                 struct sock *nsk;
1652
1653                 sk = req->rsk_listener;
1654                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1655                         sk_drops_add(sk, skb);
1656                         reqsk_put(req);
1657                         goto discard_it;
1658                 }
1659                 if (tcp_checksum_complete(skb)) {
1660                         reqsk_put(req);
1661                         goto csum_error;
1662                 }
1663                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1664                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1665                         goto lookup;
1666                 }
1667                 sock_hold(sk);
1668                 refcounted = true;
1669                 nsk = NULL;
1670                 if (!tcp_filter(sk, skb)) {
1671                         th = (const struct tcphdr *)skb->data;
1672                         hdr = ipv6_hdr(skb);
1673                         tcp_v6_fill_cb(skb, hdr, th);
1674                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1675                 }
1676                 if (!nsk) {
1677                         reqsk_put(req);
1678                         if (req_stolen) {
1679                                 /* Another cpu got exclusive access to req
1680                                  * and created a full blown socket.
1681                                  * Try to feed this packet to this socket
1682                                  * instead of discarding it.
1683                                  */
1684                                 tcp_v6_restore_cb(skb);
1685                                 sock_put(sk);
1686                                 goto lookup;
1687                         }
1688                         goto discard_and_relse;
1689                 }
1690                 if (nsk == sk) {
1691                         reqsk_put(req);
1692                         tcp_v6_restore_cb(skb);
1693                 } else if (tcp_child_process(sk, nsk, skb)) {
1694                         tcp_v6_send_reset(nsk, skb);
1695                         goto discard_and_relse;
1696                 } else {
1697                         sock_put(sk);
1698                         return 0;
1699                 }
1700         }
1701         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1702                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1703                 goto discard_and_relse;
1704         }
1705
1706         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1707                 goto discard_and_relse;
1708
1709         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1710                 goto discard_and_relse;
1711
1712         if (tcp_filter(sk, skb))
1713                 goto discard_and_relse;
1714         th = (const struct tcphdr *)skb->data;
1715         hdr = ipv6_hdr(skb);
1716         tcp_v6_fill_cb(skb, hdr, th);
1717
1718         skb->dev = NULL;
1719
1720         if (sk->sk_state == TCP_LISTEN) {
1721                 ret = tcp_v6_do_rcv(sk, skb);
1722                 goto put_and_return;
1723         }
1724
1725         sk_incoming_cpu_update(sk);
1726
1727         bh_lock_sock_nested(sk);
1728         tcp_segs_in(tcp_sk(sk), skb);
1729         ret = 0;
1730         if (!sock_owned_by_user(sk)) {
1731                 skb_to_free = sk->sk_rx_skb_cache;
1732                 sk->sk_rx_skb_cache = NULL;
1733                 ret = tcp_v6_do_rcv(sk, skb);
1734         } else {
1735                 if (tcp_add_backlog(sk, skb))
1736                         goto discard_and_relse;
1737                 skb_to_free = NULL;
1738         }
1739         bh_unlock_sock(sk);
1740         if (skb_to_free)
1741                 __kfree_skb(skb_to_free);
1742 put_and_return:
1743         if (refcounted)
1744                 sock_put(sk);
1745         return ret ? -1 : 0;
1746
1747 no_tcp_socket:
1748         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1749                 goto discard_it;
1750
1751         tcp_v6_fill_cb(skb, hdr, th);
1752
1753         if (tcp_checksum_complete(skb)) {
1754 csum_error:
1755                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1756 bad_packet:
1757                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1758         } else {
1759                 tcp_v6_send_reset(NULL, skb);
1760         }
1761
1762 discard_it:
1763         kfree_skb(skb);
1764         return 0;
1765
1766 discard_and_relse:
1767         sk_drops_add(sk, skb);
1768         if (refcounted)
1769                 sock_put(sk);
1770         goto discard_it;
1771
1772 do_time_wait:
1773         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1774                 inet_twsk_put(inet_twsk(sk));
1775                 goto discard_it;
1776         }
1777
1778         tcp_v6_fill_cb(skb, hdr, th);
1779
1780         if (tcp_checksum_complete(skb)) {
1781                 inet_twsk_put(inet_twsk(sk));
1782                 goto csum_error;
1783         }
1784
1785         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1786         case TCP_TW_SYN:
1787         {
1788                 struct sock *sk2;
1789
1790                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1791                                             skb, __tcp_hdrlen(th),
1792                                             &ipv6_hdr(skb)->saddr, th->source,
1793                                             &ipv6_hdr(skb)->daddr,
1794                                             ntohs(th->dest),
1795                                             tcp_v6_iif_l3_slave(skb),
1796                                             sdif);
1797                 if (sk2) {
1798                         struct inet_timewait_sock *tw = inet_twsk(sk);
1799                         inet_twsk_deschedule_put(tw);
1800                         sk = sk2;
1801                         tcp_v6_restore_cb(skb);
1802                         refcounted = false;
1803                         goto process;
1804                 }
1805         }
1806                 /* to ACK */
1807                 fallthrough;
1808         case TCP_TW_ACK:
1809                 tcp_v6_timewait_ack(sk, skb);
1810                 break;
1811         case TCP_TW_RST:
1812                 tcp_v6_send_reset(sk, skb);
1813                 inet_twsk_deschedule_put(inet_twsk(sk));
1814                 goto discard_it;
1815         case TCP_TW_SUCCESS:
1816                 ;
1817         }
1818         goto discard_it;
1819 }
1820
1821 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1822 {
1823         const struct ipv6hdr *hdr;
1824         const struct tcphdr *th;
1825         struct sock *sk;
1826
1827         if (skb->pkt_type != PACKET_HOST)
1828                 return;
1829
1830         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1831                 return;
1832
1833         hdr = ipv6_hdr(skb);
1834         th = tcp_hdr(skb);
1835
1836         if (th->doff < sizeof(struct tcphdr) / 4)
1837                 return;
1838
1839         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1840         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1841                                         &hdr->saddr, th->source,
1842                                         &hdr->daddr, ntohs(th->dest),
1843                                         inet6_iif(skb), inet6_sdif(skb));
1844         if (sk) {
1845                 skb->sk = sk;
1846                 skb->destructor = sock_edemux;
1847                 if (sk_fullsock(sk)) {
1848                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1849
1850                         if (dst)
1851                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1852                         if (dst &&
1853                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1854                                 skb_dst_set_noref(skb, dst);
1855                 }
1856         }
1857 }
1858
1859 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1860         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1861         .twsk_unique    = tcp_twsk_unique,
1862         .twsk_destructor = tcp_twsk_destructor,
1863 };
1864
1865 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1866 {
1867         struct ipv6_pinfo *np = inet6_sk(sk);
1868
1869         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1870 }
1871
1872 const struct inet_connection_sock_af_ops ipv6_specific = {
1873         .queue_xmit        = inet6_csk_xmit,
1874         .send_check        = tcp_v6_send_check,
1875         .rebuild_header    = inet6_sk_rebuild_header,
1876         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1877         .conn_request      = tcp_v6_conn_request,
1878         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1879         .net_header_len    = sizeof(struct ipv6hdr),
1880         .net_frag_header_len = sizeof(struct frag_hdr),
1881         .setsockopt        = ipv6_setsockopt,
1882         .getsockopt        = ipv6_getsockopt,
1883         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1884         .sockaddr_len      = sizeof(struct sockaddr_in6),
1885         .mtu_reduced       = tcp_v6_mtu_reduced,
1886 };
1887
1888 #ifdef CONFIG_TCP_MD5SIG
1889 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1890         .md5_lookup     =       tcp_v6_md5_lookup,
1891         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1892         .md5_parse      =       tcp_v6_parse_md5_keys,
1893 };
1894 #endif
1895
1896 /*
1897  *      TCP over IPv4 via INET6 API
1898  */
1899 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1900         .queue_xmit        = ip_queue_xmit,
1901         .send_check        = tcp_v4_send_check,
1902         .rebuild_header    = inet_sk_rebuild_header,
1903         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1904         .conn_request      = tcp_v6_conn_request,
1905         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1906         .net_header_len    = sizeof(struct iphdr),
1907         .setsockopt        = ipv6_setsockopt,
1908         .getsockopt        = ipv6_getsockopt,
1909         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1910         .sockaddr_len      = sizeof(struct sockaddr_in6),
1911         .mtu_reduced       = tcp_v4_mtu_reduced,
1912 };
1913
1914 #ifdef CONFIG_TCP_MD5SIG
1915 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1916         .md5_lookup     =       tcp_v4_md5_lookup,
1917         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1918         .md5_parse      =       tcp_v6_parse_md5_keys,
1919 };
1920 #endif
1921
1922 /* NOTE: A lot of things set to zero explicitly by call to
1923  *       sk_alloc() so need not be done here.
1924  */
1925 static int tcp_v6_init_sock(struct sock *sk)
1926 {
1927         struct inet_connection_sock *icsk = inet_csk(sk);
1928
1929         tcp_init_sock(sk);
1930
1931         icsk->icsk_af_ops = &ipv6_specific;
1932
1933 #ifdef CONFIG_TCP_MD5SIG
1934         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1935 #endif
1936
1937         return 0;
1938 }
1939
1940 static void tcp_v6_destroy_sock(struct sock *sk)
1941 {
1942         tcp_v4_destroy_sock(sk);
1943         inet6_destroy_sock(sk);
1944 }
1945
1946 #ifdef CONFIG_PROC_FS
1947 /* Proc filesystem TCPv6 sock list dumping. */
1948 static void get_openreq6(struct seq_file *seq,
1949                          const struct request_sock *req, int i)
1950 {
1951         long ttd = req->rsk_timer.expires - jiffies;
1952         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1953         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1954
1955         if (ttd < 0)
1956                 ttd = 0;
1957
1958         seq_printf(seq,
1959                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1960                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1961                    i,
1962                    src->s6_addr32[0], src->s6_addr32[1],
1963                    src->s6_addr32[2], src->s6_addr32[3],
1964                    inet_rsk(req)->ir_num,
1965                    dest->s6_addr32[0], dest->s6_addr32[1],
1966                    dest->s6_addr32[2], dest->s6_addr32[3],
1967                    ntohs(inet_rsk(req)->ir_rmt_port),
1968                    TCP_SYN_RECV,
1969                    0, 0, /* could print option size, but that is af dependent. */
1970                    1,   /* timers active (only the expire timer) */
1971                    jiffies_to_clock_t(ttd),
1972                    req->num_timeout,
1973                    from_kuid_munged(seq_user_ns(seq),
1974                                     sock_i_uid(req->rsk_listener)),
1975                    0,  /* non standard timer */
1976                    0, /* open_requests have no inode */
1977                    0, req);
1978 }
1979
1980 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1981 {
1982         const struct in6_addr *dest, *src;
1983         __u16 destp, srcp;
1984         int timer_active;
1985         unsigned long timer_expires;
1986         const struct inet_sock *inet = inet_sk(sp);
1987         const struct tcp_sock *tp = tcp_sk(sp);
1988         const struct inet_connection_sock *icsk = inet_csk(sp);
1989         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1990         int rx_queue;
1991         int state;
1992
1993         dest  = &sp->sk_v6_daddr;
1994         src   = &sp->sk_v6_rcv_saddr;
1995         destp = ntohs(inet->inet_dport);
1996         srcp  = ntohs(inet->inet_sport);
1997
1998         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1999             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2000             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2001                 timer_active    = 1;
2002                 timer_expires   = icsk->icsk_timeout;
2003         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2004                 timer_active    = 4;
2005                 timer_expires   = icsk->icsk_timeout;
2006         } else if (timer_pending(&sp->sk_timer)) {
2007                 timer_active    = 2;
2008                 timer_expires   = sp->sk_timer.expires;
2009         } else {
2010                 timer_active    = 0;
2011                 timer_expires = jiffies;
2012         }
2013
2014         state = inet_sk_state_load(sp);
2015         if (state == TCP_LISTEN)
2016                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2017         else
2018                 /* Because we don't lock the socket,
2019                  * we might find a transient negative value.
2020                  */
2021                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2022                                       READ_ONCE(tp->copied_seq), 0);
2023
2024         seq_printf(seq,
2025                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2026                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2027                    i,
2028                    src->s6_addr32[0], src->s6_addr32[1],
2029                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2030                    dest->s6_addr32[0], dest->s6_addr32[1],
2031                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2032                    state,
2033                    READ_ONCE(tp->write_seq) - tp->snd_una,
2034                    rx_queue,
2035                    timer_active,
2036                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2037                    icsk->icsk_retransmits,
2038                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2039                    icsk->icsk_probes_out,
2040                    sock_i_ino(sp),
2041                    refcount_read(&sp->sk_refcnt), sp,
2042                    jiffies_to_clock_t(icsk->icsk_rto),
2043                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2044                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2045                    tp->snd_cwnd,
2046                    state == TCP_LISTEN ?
2047                         fastopenq->max_qlen :
2048                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2049                    );
2050 }
2051
2052 static void get_timewait6_sock(struct seq_file *seq,
2053                                struct inet_timewait_sock *tw, int i)
2054 {
2055         long delta = tw->tw_timer.expires - jiffies;
2056         const struct in6_addr *dest, *src;
2057         __u16 destp, srcp;
2058
2059         dest = &tw->tw_v6_daddr;
2060         src  = &tw->tw_v6_rcv_saddr;
2061         destp = ntohs(tw->tw_dport);
2062         srcp  = ntohs(tw->tw_sport);
2063
2064         seq_printf(seq,
2065                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2066                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2067                    i,
2068                    src->s6_addr32[0], src->s6_addr32[1],
2069                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2070                    dest->s6_addr32[0], dest->s6_addr32[1],
2071                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2072                    tw->tw_substate, 0, 0,
2073                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2074                    refcount_read(&tw->tw_refcnt), tw);
2075 }
2076
2077 static int tcp6_seq_show(struct seq_file *seq, void *v)
2078 {
2079         struct tcp_iter_state *st;
2080         struct sock *sk = v;
2081
2082         if (v == SEQ_START_TOKEN) {
2083                 seq_puts(seq,
2084                          "  sl  "
2085                          "local_address                         "
2086                          "remote_address                        "
2087                          "st tx_queue rx_queue tr tm->when retrnsmt"
2088                          "   uid  timeout inode\n");
2089                 goto out;
2090         }
2091         st = seq->private;
2092
2093         if (sk->sk_state == TCP_TIME_WAIT)
2094                 get_timewait6_sock(seq, v, st->num);
2095         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2096                 get_openreq6(seq, v, st->num);
2097         else
2098                 get_tcp6_sock(seq, v, st->num);
2099 out:
2100         return 0;
2101 }
2102
2103 static const struct seq_operations tcp6_seq_ops = {
2104         .show           = tcp6_seq_show,
2105         .start          = tcp_seq_start,
2106         .next           = tcp_seq_next,
2107         .stop           = tcp_seq_stop,
2108 };
2109
2110 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2111         .family         = AF_INET6,
2112 };
2113
2114 int __net_init tcp6_proc_init(struct net *net)
2115 {
2116         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2117                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2118                 return -ENOMEM;
2119         return 0;
2120 }
2121
2122 void tcp6_proc_exit(struct net *net)
2123 {
2124         remove_proc_entry("tcp6", net->proc_net);
2125 }
2126 #endif
2127
2128 struct proto tcpv6_prot = {
2129         .name                   = "TCPv6",
2130         .owner                  = THIS_MODULE,
2131         .close                  = tcp_close,
2132         .pre_connect            = tcp_v6_pre_connect,
2133         .connect                = tcp_v6_connect,
2134         .disconnect             = tcp_disconnect,
2135         .accept                 = inet_csk_accept,
2136         .ioctl                  = tcp_ioctl,
2137         .init                   = tcp_v6_init_sock,
2138         .destroy                = tcp_v6_destroy_sock,
2139         .shutdown               = tcp_shutdown,
2140         .setsockopt             = tcp_setsockopt,
2141         .getsockopt             = tcp_getsockopt,
2142         .keepalive              = tcp_set_keepalive,
2143         .recvmsg                = tcp_recvmsg,
2144         .sendmsg                = tcp_sendmsg,
2145         .sendpage               = tcp_sendpage,
2146         .backlog_rcv            = tcp_v6_do_rcv,
2147         .release_cb             = tcp_release_cb,
2148         .hash                   = inet6_hash,
2149         .unhash                 = inet_unhash,
2150         .get_port               = inet_csk_get_port,
2151         .enter_memory_pressure  = tcp_enter_memory_pressure,
2152         .leave_memory_pressure  = tcp_leave_memory_pressure,
2153         .stream_memory_free     = tcp_stream_memory_free,
2154         .sockets_allocated      = &tcp_sockets_allocated,
2155         .memory_allocated       = &tcp_memory_allocated,
2156         .memory_pressure        = &tcp_memory_pressure,
2157         .orphan_count           = &tcp_orphan_count,
2158         .sysctl_mem             = sysctl_tcp_mem,
2159         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2160         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2161         .max_header             = MAX_TCP_HEADER,
2162         .obj_size               = sizeof(struct tcp6_sock),
2163         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2164         .twsk_prot              = &tcp6_timewait_sock_ops,
2165         .rsk_prot               = &tcp6_request_sock_ops,
2166         .h.hashinfo             = &tcp_hashinfo,
2167         .no_autobind            = true,
2168         .diag_destroy           = tcp_abort,
2169 };
2170 EXPORT_SYMBOL_GPL(tcpv6_prot);
2171
2172 /* thinking of making this const? Don't.
2173  * early_demux can change based on sysctl.
2174  */
2175 static struct inet6_protocol tcpv6_protocol = {
2176         .early_demux    =       tcp_v6_early_demux,
2177         .early_demux_handler =  tcp_v6_early_demux,
2178         .handler        =       tcp_v6_rcv,
2179         .err_handler    =       tcp_v6_err,
2180         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2181 };
2182
2183 static struct inet_protosw tcpv6_protosw = {
2184         .type           =       SOCK_STREAM,
2185         .protocol       =       IPPROTO_TCP,
2186         .prot           =       &tcpv6_prot,
2187         .ops            =       &inet6_stream_ops,
2188         .flags          =       INET_PROTOSW_PERMANENT |
2189                                 INET_PROTOSW_ICSK,
2190 };
2191
2192 static int __net_init tcpv6_net_init(struct net *net)
2193 {
2194         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2195                                     SOCK_RAW, IPPROTO_TCP, net);
2196 }
2197
2198 static void __net_exit tcpv6_net_exit(struct net *net)
2199 {
2200         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2201 }
2202
2203 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2204 {
2205         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2206 }
2207
2208 static struct pernet_operations tcpv6_net_ops = {
2209         .init       = tcpv6_net_init,
2210         .exit       = tcpv6_net_exit,
2211         .exit_batch = tcpv6_net_exit_batch,
2212 };
2213
2214 int __init tcpv6_init(void)
2215 {
2216         int ret;
2217
2218         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2219         if (ret)
2220                 goto out;
2221
2222         /* register inet6 protocol */
2223         ret = inet6_register_protosw(&tcpv6_protosw);
2224         if (ret)
2225                 goto out_tcpv6_protocol;
2226
2227         ret = register_pernet_subsys(&tcpv6_net_ops);
2228         if (ret)
2229                 goto out_tcpv6_protosw;
2230
2231         ret = mptcpv6_init();
2232         if (ret)
2233                 goto out_tcpv6_pernet_subsys;
2234
2235 out:
2236         return ret;
2237
2238 out_tcpv6_pernet_subsys:
2239         unregister_pernet_subsys(&tcpv6_net_ops);
2240 out_tcpv6_protosw:
2241         inet6_unregister_protosw(&tcpv6_protosw);
2242 out_tcpv6_protocol:
2243         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2244         goto out;
2245 }
2246
2247 void tcpv6_exit(void)
2248 {
2249         unregister_pernet_subsys(&tcpv6_net_ops);
2250         inet6_unregister_protosw(&tcpv6_protosw);
2251         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2252 }