GNU Linux-libre 5.10.217-gnu1
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112                 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
273         fl6.flowi6_oif = sk->sk_bound_dev_if;
274         fl6.flowi6_mark = sk->sk_mark;
275         fl6.fl6_dport = usin->sin6_port;
276         fl6.fl6_sport = inet->inet_sport;
277         fl6.flowi6_uid = sk->sk_uid;
278
279         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280         final_p = fl6_update_dst(&fl6, opt, &final);
281
282         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283
284         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
285         if (IS_ERR(dst)) {
286                 err = PTR_ERR(dst);
287                 goto failure;
288         }
289
290         if (!saddr) {
291                 saddr = &fl6.saddr;
292                 sk->sk_v6_rcv_saddr = *saddr;
293         }
294
295         /* set the source address */
296         np->saddr = *saddr;
297         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298
299         sk->sk_gso_type = SKB_GSO_TCPV6;
300         ip6_dst_store(sk, dst, NULL, NULL);
301
302         icsk->icsk_ext_hdr_len = 0;
303         if (opt)
304                 icsk->icsk_ext_hdr_len = opt->opt_flen +
305                                          opt->opt_nflen;
306
307         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308
309         inet->inet_dport = usin->sin6_port;
310
311         tcp_set_state(sk, TCP_SYN_SENT);
312         err = inet6_hash_connect(tcp_death_row, sk);
313         if (err)
314                 goto late_failure;
315
316         sk_set_txhash(sk);
317
318         if (likely(!tp->repair)) {
319                 if (!tp->write_seq)
320                         WRITE_ONCE(tp->write_seq,
321                                    secure_tcpv6_seq(np->saddr.s6_addr32,
322                                                     sk->sk_v6_daddr.s6_addr32,
323                                                     inet->inet_sport,
324                                                     inet->inet_dport));
325                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326                                                    np->saddr.s6_addr32,
327                                                    sk->sk_v6_daddr.s6_addr32);
328         }
329
330         if (tcp_fastopen_defer_connect(sk, &err))
331                 return err;
332         if (err)
333                 goto late_failure;
334
335         err = tcp_connect(sk);
336         if (err)
337                 goto late_failure;
338
339         return 0;
340
341 late_failure:
342         tcp_set_state(sk, TCP_CLOSE);
343         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
344                 inet_reset_saddr(sk);
345 failure:
346         inet->inet_dport = 0;
347         sk->sk_route_caps = 0;
348         return err;
349 }
350
351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353         struct dst_entry *dst;
354         u32 mtu;
355
356         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357                 return;
358
359         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361         /* Drop requests trying to increase our current mss.
362          * Check done in __ip6_rt_update_pmtu() is too late.
363          */
364         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365                 return;
366
367         dst = inet6_csk_update_pmtu(sk, mtu);
368         if (!dst)
369                 return;
370
371         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372                 tcp_sync_mss(sk, dst_mtu(dst));
373                 tcp_simple_retransmit(sk);
374         }
375 }
376
377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378                 u8 type, u8 code, int offset, __be32 info)
379 {
380         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382         struct net *net = dev_net(skb->dev);
383         struct request_sock *fastopen;
384         struct ipv6_pinfo *np;
385         struct tcp_sock *tp;
386         __u32 seq, snd_una;
387         struct sock *sk;
388         bool fatal;
389         int err;
390
391         sk = __inet6_lookup_established(net, &tcp_hashinfo,
392                                         &hdr->daddr, th->dest,
393                                         &hdr->saddr, ntohs(th->source),
394                                         skb->dev->ifindex, inet6_sdif(skb));
395
396         if (!sk) {
397                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398                                   ICMP6_MIB_INERRORS);
399                 return -ENOENT;
400         }
401
402         if (sk->sk_state == TCP_TIME_WAIT) {
403                 inet_twsk_put(inet_twsk(sk));
404                 return 0;
405         }
406         seq = ntohl(th->seq);
407         fatal = icmpv6_err_convert(type, code, &err);
408         if (sk->sk_state == TCP_NEW_SYN_RECV) {
409                 tcp_req_err(sk, seq, fatal);
410                 return 0;
411         }
412
413         bh_lock_sock(sk);
414         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416
417         if (sk->sk_state == TCP_CLOSE)
418                 goto out;
419
420         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
421                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422                 goto out;
423         }
424
425         tp = tcp_sk(sk);
426         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427         fastopen = rcu_dereference(tp->fastopen_rsk);
428         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429         if (sk->sk_state != TCP_LISTEN &&
430             !between(seq, snd_una, tp->snd_nxt)) {
431                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432                 goto out;
433         }
434
435         np = tcp_inet6_sk(sk);
436
437         if (type == NDISC_REDIRECT) {
438                 if (!sock_owned_by_user(sk)) {
439                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440
441                         if (dst)
442                                 dst->ops->redirect(dst, sk, skb);
443                 }
444                 goto out;
445         }
446
447         if (type == ICMPV6_PKT_TOOBIG) {
448                 u32 mtu = ntohl(info);
449
450                 /* We are not interested in TCP_LISTEN and open_requests
451                  * (SYN-ACKs send out by Linux are always <576bytes so
452                  * they should go through unfragmented).
453                  */
454                 if (sk->sk_state == TCP_LISTEN)
455                         goto out;
456
457                 if (!ip6_sk_accept_pmtu(sk))
458                         goto out;
459
460                 if (mtu < IPV6_MIN_MTU)
461                         goto out;
462
463                 WRITE_ONCE(tp->mtu_info, mtu);
464
465                 if (!sock_owned_by_user(sk))
466                         tcp_v6_mtu_reduced(sk);
467                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468                                            &sk->sk_tsq_flags))
469                         sock_hold(sk);
470                 goto out;
471         }
472
473
474         /* Might be for an request_sock */
475         switch (sk->sk_state) {
476         case TCP_SYN_SENT:
477         case TCP_SYN_RECV:
478                 /* Only in fast or simultaneous open. If a fast open socket is
479                  * already accepted it is treated as a connected one below.
480                  */
481                 if (fastopen && !fastopen->sk)
482                         break;
483
484                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485
486                 if (!sock_owned_by_user(sk)) {
487                         sk->sk_err = err;
488                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
489
490                         tcp_done(sk);
491                 } else
492                         sk->sk_err_soft = err;
493                 goto out;
494         case TCP_LISTEN:
495                 break;
496         default:
497                 /* check if this ICMP message allows revert of backoff.
498                  * (see RFC 6069)
499                  */
500                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501                     code == ICMPV6_NOROUTE)
502                         tcp_ld_RTO_revert(sk, seq);
503         }
504
505         if (!sock_owned_by_user(sk) && np->recverr) {
506                 sk->sk_err = err;
507                 sk->sk_error_report(sk);
508         } else
509                 sk->sk_err_soft = err;
510
511 out:
512         bh_unlock_sock(sk);
513         sock_put(sk);
514         return 0;
515 }
516
517
518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519                               struct flowi *fl,
520                               struct request_sock *req,
521                               struct tcp_fastopen_cookie *foc,
522                               enum tcp_synack_type synack_type,
523                               struct sk_buff *syn_skb)
524 {
525         struct inet_request_sock *ireq = inet_rsk(req);
526         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527         struct ipv6_txoptions *opt;
528         struct flowi6 *fl6 = &fl->u.ip6;
529         struct sk_buff *skb;
530         int err = -ENOMEM;
531         u8 tclass;
532
533         /* First, grab a route. */
534         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535                                                IPPROTO_TCP)) == NULL)
536                 goto done;
537
538         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540         if (skb) {
541                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542                                     &ireq->ir_v6_rmt_addr);
543
544                 fl6->daddr = ireq->ir_v6_rmt_addr;
545                 if (np->repflow && ireq->pktopts)
546                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550                                 (np->tclass & INET_ECN_MASK) :
551                                 np->tclass;
552
553                 if (!INET_ECN_is_capable(tclass) &&
554                     tcp_bpf_ca_needs_ecn((struct sock *)req))
555                         tclass |= INET_ECN_ECT_0;
556
557                 rcu_read_lock();
558                 opt = ireq->ipv6_opt;
559                 if (!opt)
560                         opt = rcu_dereference(np->opt);
561                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562                                tclass, sk->sk_priority);
563                 rcu_read_unlock();
564                 err = net_xmit_eval(err);
565         }
566
567 done:
568         return err;
569 }
570
571
572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574         kfree(inet_rsk(req)->ipv6_opt);
575         kfree_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580                                                    const struct in6_addr *addr,
581                                                    int l3index)
582 {
583         return tcp_md5_do_lookup(sk, l3index,
584                                  (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588                                                 const struct sock *addr_sk)
589 {
590         int l3index;
591
592         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593                                                  addr_sk->sk_bound_dev_if);
594         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595                                     l3index);
596 }
597
598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599                                  sockptr_t optval, int optlen)
600 {
601         struct tcp_md5sig cmd;
602         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603         int l3index = 0;
604         u8 prefixlen;
605
606         if (optlen < sizeof(cmd))
607                 return -EINVAL;
608
609         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
610                 return -EFAULT;
611
612         if (sin6->sin6_family != AF_INET6)
613                 return -EINVAL;
614
615         if (optname == TCP_MD5SIG_EXT &&
616             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
617                 prefixlen = cmd.tcpm_prefixlen;
618                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619                                         prefixlen > 32))
620                         return -EINVAL;
621         } else {
622                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
623         }
624
625         if (optname == TCP_MD5SIG_EXT &&
626             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
627                 struct net_device *dev;
628
629                 rcu_read_lock();
630                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
631                 if (dev && netif_is_l3_master(dev))
632                         l3index = dev->ifindex;
633                 rcu_read_unlock();
634
635                 /* ok to reference set/not set outside of rcu;
636                  * right now device MUST be an L3 master
637                  */
638                 if (!dev || !l3index)
639                         return -EINVAL;
640         }
641
642         if (!cmd.tcpm_keylen) {
643                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
644                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
645                                               AF_INET, prefixlen,
646                                               l3index);
647                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
648                                       AF_INET6, prefixlen, l3index);
649         }
650
651         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
652                 return -EINVAL;
653
654         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
655                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
656                                       AF_INET, prefixlen, l3index,
657                                       cmd.tcpm_key, cmd.tcpm_keylen,
658                                       GFP_KERNEL);
659
660         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
661                               AF_INET6, prefixlen, l3index,
662                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
663 }
664
665 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
666                                    const struct in6_addr *daddr,
667                                    const struct in6_addr *saddr,
668                                    const struct tcphdr *th, int nbytes)
669 {
670         struct tcp6_pseudohdr *bp;
671         struct scatterlist sg;
672         struct tcphdr *_th;
673
674         bp = hp->scratch;
675         /* 1. TCP pseudo-header (RFC2460) */
676         bp->saddr = *saddr;
677         bp->daddr = *daddr;
678         bp->protocol = cpu_to_be32(IPPROTO_TCP);
679         bp->len = cpu_to_be32(nbytes);
680
681         _th = (struct tcphdr *)(bp + 1);
682         memcpy(_th, th, sizeof(*th));
683         _th->check = 0;
684
685         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
686         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
687                                 sizeof(*bp) + sizeof(*th));
688         return crypto_ahash_update(hp->md5_req);
689 }
690
691 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
692                                const struct in6_addr *daddr, struct in6_addr *saddr,
693                                const struct tcphdr *th)
694 {
695         struct tcp_md5sig_pool *hp;
696         struct ahash_request *req;
697
698         hp = tcp_get_md5sig_pool();
699         if (!hp)
700                 goto clear_hash_noput;
701         req = hp->md5_req;
702
703         if (crypto_ahash_init(req))
704                 goto clear_hash;
705         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
706                 goto clear_hash;
707         if (tcp_md5_hash_key(hp, key))
708                 goto clear_hash;
709         ahash_request_set_crypt(req, NULL, md5_hash, 0);
710         if (crypto_ahash_final(req))
711                 goto clear_hash;
712
713         tcp_put_md5sig_pool();
714         return 0;
715
716 clear_hash:
717         tcp_put_md5sig_pool();
718 clear_hash_noput:
719         memset(md5_hash, 0, 16);
720         return 1;
721 }
722
723 static int tcp_v6_md5_hash_skb(char *md5_hash,
724                                const struct tcp_md5sig_key *key,
725                                const struct sock *sk,
726                                const struct sk_buff *skb)
727 {
728         const struct in6_addr *saddr, *daddr;
729         struct tcp_md5sig_pool *hp;
730         struct ahash_request *req;
731         const struct tcphdr *th = tcp_hdr(skb);
732
733         if (sk) { /* valid for establish/request sockets */
734                 saddr = &sk->sk_v6_rcv_saddr;
735                 daddr = &sk->sk_v6_daddr;
736         } else {
737                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
738                 saddr = &ip6h->saddr;
739                 daddr = &ip6h->daddr;
740         }
741
742         hp = tcp_get_md5sig_pool();
743         if (!hp)
744                 goto clear_hash_noput;
745         req = hp->md5_req;
746
747         if (crypto_ahash_init(req))
748                 goto clear_hash;
749
750         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
751                 goto clear_hash;
752         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
753                 goto clear_hash;
754         if (tcp_md5_hash_key(hp, key))
755                 goto clear_hash;
756         ahash_request_set_crypt(req, NULL, md5_hash, 0);
757         if (crypto_ahash_final(req))
758                 goto clear_hash;
759
760         tcp_put_md5sig_pool();
761         return 0;
762
763 clear_hash:
764         tcp_put_md5sig_pool();
765 clear_hash_noput:
766         memset(md5_hash, 0, 16);
767         return 1;
768 }
769
770 #endif
771
772 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
773                                     const struct sk_buff *skb,
774                                     int dif, int sdif)
775 {
776 #ifdef CONFIG_TCP_MD5SIG
777         const __u8 *hash_location = NULL;
778         struct tcp_md5sig_key *hash_expected;
779         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
780         const struct tcphdr *th = tcp_hdr(skb);
781         int genhash, l3index;
782         u8 newhash[16];
783
784         /* sdif set, means packet ingressed via a device
785          * in an L3 domain and dif is set to the l3mdev
786          */
787         l3index = sdif ? dif : 0;
788
789         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
790         hash_location = tcp_parse_md5sig_option(th);
791
792         /* We've parsed the options - do we have a hash? */
793         if (!hash_expected && !hash_location)
794                 return false;
795
796         if (hash_expected && !hash_location) {
797                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798                 return true;
799         }
800
801         if (!hash_expected && hash_location) {
802                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803                 return true;
804         }
805
806         /* check the signature */
807         genhash = tcp_v6_md5_hash_skb(newhash,
808                                       hash_expected,
809                                       NULL, skb);
810
811         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
812                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
813                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
814                                      genhash ? "failed" : "mismatch",
815                                      &ip6h->saddr, ntohs(th->source),
816                                      &ip6h->daddr, ntohs(th->dest), l3index);
817                 return true;
818         }
819 #endif
820         return false;
821 }
822
823 static void tcp_v6_init_req(struct request_sock *req,
824                             const struct sock *sk_listener,
825                             struct sk_buff *skb)
826 {
827         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
828         struct inet_request_sock *ireq = inet_rsk(req);
829         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
830
831         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
832         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
833
834         /* So that link locals have meaning */
835         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
836             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
837                 ireq->ir_iif = tcp_v6_iif(skb);
838
839         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
840             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
841              np->rxopt.bits.rxinfo ||
842              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
843              np->rxopt.bits.rxohlim || np->repflow)) {
844                 refcount_inc(&skb->users);
845                 ireq->pktopts = skb;
846         }
847 }
848
849 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
850                                           struct flowi *fl,
851                                           const struct request_sock *req)
852 {
853         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
854 }
855
856 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
857         .family         =       AF_INET6,
858         .obj_size       =       sizeof(struct tcp6_request_sock),
859         .rtx_syn_ack    =       tcp_rtx_synack,
860         .send_ack       =       tcp_v6_reqsk_send_ack,
861         .destructor     =       tcp_v6_reqsk_destructor,
862         .send_reset     =       tcp_v6_send_reset,
863         .syn_ack_timeout =      tcp_syn_ack_timeout,
864 };
865
866 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
867         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
868                                 sizeof(struct ipv6hdr),
869 #ifdef CONFIG_TCP_MD5SIG
870         .req_md5_lookup =       tcp_v6_md5_lookup,
871         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
872 #endif
873         .init_req       =       tcp_v6_init_req,
874 #ifdef CONFIG_SYN_COOKIES
875         .cookie_init_seq =      cookie_v6_init_sequence,
876 #endif
877         .route_req      =       tcp_v6_route_req,
878         .init_seq       =       tcp_v6_init_seq,
879         .init_ts_off    =       tcp_v6_init_ts_off,
880         .send_synack    =       tcp_v6_send_synack,
881 };
882
883 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
884                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
885                                  int oif, struct tcp_md5sig_key *key, int rst,
886                                  u8 tclass, __be32 label, u32 priority)
887 {
888         const struct tcphdr *th = tcp_hdr(skb);
889         struct tcphdr *t1;
890         struct sk_buff *buff;
891         struct flowi6 fl6;
892         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
893         struct sock *ctl_sk = net->ipv6.tcp_sk;
894         unsigned int tot_len = sizeof(struct tcphdr);
895         struct dst_entry *dst;
896         __be32 *topt;
897         __u32 mark = 0;
898
899         if (tsecr)
900                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
901 #ifdef CONFIG_TCP_MD5SIG
902         if (key)
903                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
904 #endif
905
906         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
907                          GFP_ATOMIC);
908         if (!buff)
909                 return;
910
911         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
912
913         t1 = skb_push(buff, tot_len);
914         skb_reset_transport_header(buff);
915
916         /* Swap the send and the receive. */
917         memset(t1, 0, sizeof(*t1));
918         t1->dest = th->source;
919         t1->source = th->dest;
920         t1->doff = tot_len / 4;
921         t1->seq = htonl(seq);
922         t1->ack_seq = htonl(ack);
923         t1->ack = !rst || !th->ack;
924         t1->rst = rst;
925         t1->window = htons(win);
926
927         topt = (__be32 *)(t1 + 1);
928
929         if (tsecr) {
930                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
931                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
932                 *topt++ = htonl(tsval);
933                 *topt++ = htonl(tsecr);
934         }
935
936 #ifdef CONFIG_TCP_MD5SIG
937         if (key) {
938                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
939                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
940                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
941                                     &ipv6_hdr(skb)->saddr,
942                                     &ipv6_hdr(skb)->daddr, t1);
943         }
944 #endif
945
946         memset(&fl6, 0, sizeof(fl6));
947         fl6.daddr = ipv6_hdr(skb)->saddr;
948         fl6.saddr = ipv6_hdr(skb)->daddr;
949         fl6.flowlabel = label;
950
951         buff->ip_summed = CHECKSUM_PARTIAL;
952         buff->csum = 0;
953
954         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
955
956         fl6.flowi6_proto = IPPROTO_TCP;
957         if (rt6_need_strict(&fl6.daddr) && !oif)
958                 fl6.flowi6_oif = tcp_v6_iif(skb);
959         else {
960                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
961                         oif = skb->skb_iif;
962
963                 fl6.flowi6_oif = oif;
964         }
965
966         if (sk) {
967                 if (sk->sk_state == TCP_TIME_WAIT) {
968                         mark = inet_twsk(sk)->tw_mark;
969                         /* autoflowlabel relies on buff->hash */
970                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
971                                      PKT_HASH_TYPE_L4);
972                 } else {
973                         mark = sk->sk_mark;
974                 }
975                 buff->tstamp = tcp_transmit_time(sk);
976         }
977         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
978         fl6.fl6_dport = t1->dest;
979         fl6.fl6_sport = t1->source;
980         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
981         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
982
983         /* Pass a socket to ip6_dst_lookup either it is for RST
984          * Underlying function will use this to retrieve the network
985          * namespace
986          */
987         if (sk && sk->sk_state != TCP_TIME_WAIT)
988                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
989         else
990                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
991         if (!IS_ERR(dst)) {
992                 skb_dst_set(buff, dst);
993                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
994                          tclass & ~INET_ECN_MASK, priority);
995                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
996                 if (rst)
997                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
998                 return;
999         }
1000
1001         kfree_skb(buff);
1002 }
1003
1004 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1005 {
1006         const struct tcphdr *th = tcp_hdr(skb);
1007         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1008         u32 seq = 0, ack_seq = 0;
1009         struct tcp_md5sig_key *key = NULL;
1010 #ifdef CONFIG_TCP_MD5SIG
1011         const __u8 *hash_location = NULL;
1012         unsigned char newhash[16];
1013         int genhash;
1014         struct sock *sk1 = NULL;
1015 #endif
1016         __be32 label = 0;
1017         u32 priority = 0;
1018         struct net *net;
1019         int oif = 0;
1020
1021         if (th->rst)
1022                 return;
1023
1024         /* If sk not NULL, it means we did a successful lookup and incoming
1025          * route had to be correct. prequeue might have dropped our dst.
1026          */
1027         if (!sk && !ipv6_unicast_destination(skb))
1028                 return;
1029
1030         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1031 #ifdef CONFIG_TCP_MD5SIG
1032         rcu_read_lock();
1033         hash_location = tcp_parse_md5sig_option(th);
1034         if (sk && sk_fullsock(sk)) {
1035                 int l3index;
1036
1037                 /* sdif set, means packet ingressed via a device
1038                  * in an L3 domain and inet_iif is set to it.
1039                  */
1040                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1041                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1042         } else if (hash_location) {
1043                 int dif = tcp_v6_iif_l3_slave(skb);
1044                 int sdif = tcp_v6_sdif(skb);
1045                 int l3index;
1046
1047                 /*
1048                  * active side is lost. Try to find listening socket through
1049                  * source port, and then find md5 key through listening socket.
1050                  * we are not loose security here:
1051                  * Incoming packet is checked with md5 hash with finding key,
1052                  * no RST generated if md5 hash doesn't match.
1053                  */
1054                 sk1 = inet6_lookup_listener(net,
1055                                            &tcp_hashinfo, NULL, 0,
1056                                            &ipv6h->saddr,
1057                                            th->source, &ipv6h->daddr,
1058                                            ntohs(th->source), dif, sdif);
1059                 if (!sk1)
1060                         goto out;
1061
1062                 /* sdif set, means packet ingressed via a device
1063                  * in an L3 domain and dif is set to it.
1064                  */
1065                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1066
1067                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1068                 if (!key)
1069                         goto out;
1070
1071                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1072                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1073                         goto out;
1074         }
1075 #endif
1076
1077         if (th->ack)
1078                 seq = ntohl(th->ack_seq);
1079         else
1080                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1081                           (th->doff << 2);
1082
1083         if (sk) {
1084                 oif = sk->sk_bound_dev_if;
1085                 if (sk_fullsock(sk)) {
1086                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1087
1088                         trace_tcp_send_reset(sk, skb);
1089                         if (np->repflow)
1090                                 label = ip6_flowlabel(ipv6h);
1091                         priority = sk->sk_priority;
1092                 }
1093                 if (sk->sk_state == TCP_TIME_WAIT) {
1094                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1095                         priority = inet_twsk(sk)->tw_priority;
1096                 }
1097         } else {
1098                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1099                         label = ip6_flowlabel(ipv6h);
1100         }
1101
1102         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1103                              ipv6_get_dsfield(ipv6h), label, priority);
1104
1105 #ifdef CONFIG_TCP_MD5SIG
1106 out:
1107         rcu_read_unlock();
1108 #endif
1109 }
1110
1111 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1112                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1113                             struct tcp_md5sig_key *key, u8 tclass,
1114                             __be32 label, u32 priority)
1115 {
1116         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1117                              tclass, label, priority);
1118 }
1119
1120 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1121 {
1122         struct inet_timewait_sock *tw = inet_twsk(sk);
1123         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1124
1125         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1126                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1127                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1128                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1129                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1130
1131         inet_twsk_put(tw);
1132 }
1133
1134 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1135                                   struct request_sock *req)
1136 {
1137         int l3index;
1138
1139         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1140
1141         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1142          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1143          */
1144         /* RFC 7323 2.3
1145          * The window field (SEG.WND) of every outgoing segment, with the
1146          * exception of <SYN> segments, MUST be right-shifted by
1147          * Rcv.Wind.Shift bits:
1148          */
1149         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1150                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1151                         tcp_rsk(req)->rcv_nxt,
1152                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1153                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1154                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1155                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1156                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1157 }
1158
1159
1160 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1161 {
1162 #ifdef CONFIG_SYN_COOKIES
1163         const struct tcphdr *th = tcp_hdr(skb);
1164
1165         if (!th->syn)
1166                 sk = cookie_v6_check(sk, skb);
1167 #endif
1168         return sk;
1169 }
1170
1171 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1172                          struct tcphdr *th, u32 *cookie)
1173 {
1174         u16 mss = 0;
1175 #ifdef CONFIG_SYN_COOKIES
1176         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1177                                     &tcp_request_sock_ipv6_ops, sk, th);
1178         if (mss) {
1179                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1180                 tcp_synq_overflow(sk);
1181         }
1182 #endif
1183         return mss;
1184 }
1185
1186 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1187 {
1188         if (skb->protocol == htons(ETH_P_IP))
1189                 return tcp_v4_conn_request(sk, skb);
1190
1191         if (!ipv6_unicast_destination(skb))
1192                 goto drop;
1193
1194         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1195                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1196                 return 0;
1197         }
1198
1199         return tcp_conn_request(&tcp6_request_sock_ops,
1200                                 &tcp_request_sock_ipv6_ops, sk, skb);
1201
1202 drop:
1203         tcp_listendrop(sk);
1204         return 0; /* don't send reset */
1205 }
1206
1207 static void tcp_v6_restore_cb(struct sk_buff *skb)
1208 {
1209         /* We need to move header back to the beginning if xfrm6_policy_check()
1210          * and tcp_v6_fill_cb() are going to be called again.
1211          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1212          */
1213         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1214                 sizeof(struct inet6_skb_parm));
1215 }
1216
1217 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1218                                          struct request_sock *req,
1219                                          struct dst_entry *dst,
1220                                          struct request_sock *req_unhash,
1221                                          bool *own_req)
1222 {
1223         struct inet_request_sock *ireq;
1224         struct ipv6_pinfo *newnp;
1225         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1226         struct ipv6_txoptions *opt;
1227         struct inet_sock *newinet;
1228         bool found_dup_sk = false;
1229         struct tcp_sock *newtp;
1230         struct sock *newsk;
1231 #ifdef CONFIG_TCP_MD5SIG
1232         struct tcp_md5sig_key *key;
1233         int l3index;
1234 #endif
1235         struct flowi6 fl6;
1236
1237         if (skb->protocol == htons(ETH_P_IP)) {
1238                 /*
1239                  *      v6 mapped
1240                  */
1241
1242                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1243                                              req_unhash, own_req);
1244
1245                 if (!newsk)
1246                         return NULL;
1247
1248                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1249
1250                 newinet = inet_sk(newsk);
1251                 newnp = tcp_inet6_sk(newsk);
1252                 newtp = tcp_sk(newsk);
1253
1254                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1255
1256                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1257
1258                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1259                 if (sk_is_mptcp(newsk))
1260                         mptcpv6_handle_mapped(newsk, true);
1261                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1262 #ifdef CONFIG_TCP_MD5SIG
1263                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1264 #endif
1265
1266                 newnp->ipv6_mc_list = NULL;
1267                 newnp->ipv6_ac_list = NULL;
1268                 newnp->ipv6_fl_list = NULL;
1269                 newnp->pktoptions  = NULL;
1270                 newnp->opt         = NULL;
1271                 newnp->mcast_oif   = inet_iif(skb);
1272                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1273                 newnp->rcv_flowinfo = 0;
1274                 if (np->repflow)
1275                         newnp->flow_label = 0;
1276
1277                 /*
1278                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1279                  * here, tcp_create_openreq_child now does this for us, see the comment in
1280                  * that function for the gory details. -acme
1281                  */
1282
1283                 /* It is tricky place. Until this moment IPv4 tcp
1284                    worked with IPv6 icsk.icsk_af_ops.
1285                    Sync it now.
1286                  */
1287                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1288
1289                 return newsk;
1290         }
1291
1292         ireq = inet_rsk(req);
1293
1294         if (sk_acceptq_is_full(sk))
1295                 goto out_overflow;
1296
1297         if (!dst) {
1298                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1299                 if (!dst)
1300                         goto out;
1301         }
1302
1303         newsk = tcp_create_openreq_child(sk, req, skb);
1304         if (!newsk)
1305                 goto out_nonewsk;
1306
1307         /*
1308          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1309          * count here, tcp_create_openreq_child now does this for us, see the
1310          * comment in that function for the gory details. -acme
1311          */
1312
1313         newsk->sk_gso_type = SKB_GSO_TCPV6;
1314         ip6_dst_store(newsk, dst, NULL, NULL);
1315         inet6_sk_rx_dst_set(newsk, skb);
1316
1317         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1318
1319         newtp = tcp_sk(newsk);
1320         newinet = inet_sk(newsk);
1321         newnp = tcp_inet6_sk(newsk);
1322
1323         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1324
1325         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1326         newnp->saddr = ireq->ir_v6_loc_addr;
1327         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1328         newsk->sk_bound_dev_if = ireq->ir_iif;
1329
1330         /* Now IPv6 options...
1331
1332            First: no IPv4 options.
1333          */
1334         newinet->inet_opt = NULL;
1335         newnp->ipv6_mc_list = NULL;
1336         newnp->ipv6_ac_list = NULL;
1337         newnp->ipv6_fl_list = NULL;
1338
1339         /* Clone RX bits */
1340         newnp->rxopt.all = np->rxopt.all;
1341
1342         newnp->pktoptions = NULL;
1343         newnp->opt        = NULL;
1344         newnp->mcast_oif  = tcp_v6_iif(skb);
1345         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1346         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1347         if (np->repflow)
1348                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1349
1350         /* Set ToS of the new socket based upon the value of incoming SYN.
1351          * ECT bits are set later in tcp_init_transfer().
1352          */
1353         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1354                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1355
1356         /* Clone native IPv6 options from listening socket (if any)
1357
1358            Yes, keeping reference count would be much more clever,
1359            but we make one more one thing there: reattach optmem
1360            to newsk.
1361          */
1362         opt = ireq->ipv6_opt;
1363         if (!opt)
1364                 opt = rcu_dereference(np->opt);
1365         if (opt) {
1366                 opt = ipv6_dup_options(newsk, opt);
1367                 RCU_INIT_POINTER(newnp->opt, opt);
1368         }
1369         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1370         if (opt)
1371                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1372                                                     opt->opt_flen;
1373
1374         tcp_ca_openreq_child(newsk, dst);
1375
1376         tcp_sync_mss(newsk, dst_mtu(dst));
1377         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1378
1379         tcp_initialize_rcv_mss(newsk);
1380
1381         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1382         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1383
1384 #ifdef CONFIG_TCP_MD5SIG
1385         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1386
1387         /* Copy over the MD5 key from the original socket */
1388         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1389         if (key) {
1390                 /* We're using one, so create a matching key
1391                  * on the newsk structure. If we fail to get
1392                  * memory, then we end up not copying the key
1393                  * across. Shucks.
1394                  */
1395                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1396                                AF_INET6, 128, l3index, key->key, key->keylen,
1397                                sk_gfp_mask(sk, GFP_ATOMIC));
1398         }
1399 #endif
1400
1401         if (__inet_inherit_port(sk, newsk) < 0) {
1402                 inet_csk_prepare_forced_close(newsk);
1403                 tcp_done(newsk);
1404                 goto out;
1405         }
1406         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1407                                        &found_dup_sk);
1408         if (*own_req) {
1409                 tcp_move_syn(newtp, req);
1410
1411                 /* Clone pktoptions received with SYN, if we own the req */
1412                 if (ireq->pktopts) {
1413                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1414                         consume_skb(ireq->pktopts);
1415                         ireq->pktopts = NULL;
1416                         if (newnp->pktoptions)
1417                                 tcp_v6_restore_cb(newnp->pktoptions);
1418                 }
1419         } else {
1420                 if (!req_unhash && found_dup_sk) {
1421                         /* This code path should only be executed in the
1422                          * syncookie case only
1423                          */
1424                         bh_unlock_sock(newsk);
1425                         sock_put(newsk);
1426                         newsk = NULL;
1427                 }
1428         }
1429
1430         return newsk;
1431
1432 out_overflow:
1433         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1434 out_nonewsk:
1435         dst_release(dst);
1436 out:
1437         tcp_listendrop(sk);
1438         return NULL;
1439 }
1440
1441 /* The socket must have it's spinlock held when we get
1442  * here, unless it is a TCP_LISTEN socket.
1443  *
1444  * We have a potential double-lock case here, so even when
1445  * doing backlog processing we use the BH locking scheme.
1446  * This is because we cannot sleep with the original spinlock
1447  * held.
1448  */
1449 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1450 {
1451         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1452         struct sk_buff *opt_skb = NULL;
1453         struct tcp_sock *tp;
1454
1455         /* Imagine: socket is IPv6. IPv4 packet arrives,
1456            goes to IPv4 receive handler and backlogged.
1457            From backlog it always goes here. Kerboom...
1458            Fortunately, tcp_rcv_established and rcv_established
1459            handle them correctly, but it is not case with
1460            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1461          */
1462
1463         if (skb->protocol == htons(ETH_P_IP))
1464                 return tcp_v4_do_rcv(sk, skb);
1465
1466         /*
1467          *      socket locking is here for SMP purposes as backlog rcv
1468          *      is currently called with bh processing disabled.
1469          */
1470
1471         /* Do Stevens' IPV6_PKTOPTIONS.
1472
1473            Yes, guys, it is the only place in our code, where we
1474            may make it not affecting IPv4.
1475            The rest of code is protocol independent,
1476            and I do not like idea to uglify IPv4.
1477
1478            Actually, all the idea behind IPV6_PKTOPTIONS
1479            looks not very well thought. For now we latch
1480            options, received in the last packet, enqueued
1481            by tcp. Feel free to propose better solution.
1482                                                --ANK (980728)
1483          */
1484         if (np->rxopt.all)
1485                 opt_skb = skb_clone_and_charge_r(skb, sk);
1486
1487         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1488                 struct dst_entry *dst;
1489
1490                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1491                                                 lockdep_sock_is_held(sk));
1492
1493                 sock_rps_save_rxhash(sk, skb);
1494                 sk_mark_napi_id(sk, skb);
1495                 if (dst) {
1496                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1497                             dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1498                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1499                                 dst_release(dst);
1500                         }
1501                 }
1502
1503                 tcp_rcv_established(sk, skb);
1504                 if (opt_skb)
1505                         goto ipv6_pktoptions;
1506                 return 0;
1507         }
1508
1509         if (tcp_checksum_complete(skb))
1510                 goto csum_err;
1511
1512         if (sk->sk_state == TCP_LISTEN) {
1513                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1514
1515                 if (!nsk)
1516                         goto discard;
1517
1518                 if (nsk != sk) {
1519                         if (tcp_child_process(sk, nsk, skb))
1520                                 goto reset;
1521                         if (opt_skb)
1522                                 __kfree_skb(opt_skb);
1523                         return 0;
1524                 }
1525         } else
1526                 sock_rps_save_rxhash(sk, skb);
1527
1528         if (tcp_rcv_state_process(sk, skb))
1529                 goto reset;
1530         if (opt_skb)
1531                 goto ipv6_pktoptions;
1532         return 0;
1533
1534 reset:
1535         tcp_v6_send_reset(sk, skb);
1536 discard:
1537         if (opt_skb)
1538                 __kfree_skb(opt_skb);
1539         kfree_skb(skb);
1540         return 0;
1541 csum_err:
1542         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1543         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1544         goto discard;
1545
1546
1547 ipv6_pktoptions:
1548         /* Do you ask, what is it?
1549
1550            1. skb was enqueued by tcp.
1551            2. skb is added to tail of read queue, rather than out of order.
1552            3. socket is not in passive state.
1553            4. Finally, it really contains options, which user wants to receive.
1554          */
1555         tp = tcp_sk(sk);
1556         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1557             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1558                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1559                         np->mcast_oif = tcp_v6_iif(opt_skb);
1560                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1561                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1562                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1563                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1564                 if (np->repflow)
1565                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1566                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1567                         tcp_v6_restore_cb(opt_skb);
1568                         opt_skb = xchg(&np->pktoptions, opt_skb);
1569                 } else {
1570                         __kfree_skb(opt_skb);
1571                         opt_skb = xchg(&np->pktoptions, NULL);
1572                 }
1573         }
1574
1575         kfree_skb(opt_skb);
1576         return 0;
1577 }
1578
1579 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1580                            const struct tcphdr *th)
1581 {
1582         /* This is tricky: we move IP6CB at its correct location into
1583          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1584          * _decode_session6() uses IP6CB().
1585          * barrier() makes sure compiler won't play aliasing games.
1586          */
1587         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1588                 sizeof(struct inet6_skb_parm));
1589         barrier();
1590
1591         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1592         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1593                                     skb->len - th->doff*4);
1594         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1595         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1596         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1597         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1598         TCP_SKB_CB(skb)->sacked = 0;
1599         TCP_SKB_CB(skb)->has_rxtstamp =
1600                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1601 }
1602
1603 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1604 {
1605         struct sk_buff *skb_to_free;
1606         int sdif = inet6_sdif(skb);
1607         int dif = inet6_iif(skb);
1608         const struct tcphdr *th;
1609         const struct ipv6hdr *hdr;
1610         bool refcounted;
1611         struct sock *sk;
1612         int ret;
1613         struct net *net = dev_net(skb->dev);
1614
1615         if (skb->pkt_type != PACKET_HOST)
1616                 goto discard_it;
1617
1618         /*
1619          *      Count it even if it's bad.
1620          */
1621         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1622
1623         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1624                 goto discard_it;
1625
1626         th = (const struct tcphdr *)skb->data;
1627
1628         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1629                 goto bad_packet;
1630         if (!pskb_may_pull(skb, th->doff*4))
1631                 goto discard_it;
1632
1633         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1634                 goto csum_error;
1635
1636         th = (const struct tcphdr *)skb->data;
1637         hdr = ipv6_hdr(skb);
1638
1639 lookup:
1640         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1641                                 th->source, th->dest, inet6_iif(skb), sdif,
1642                                 &refcounted);
1643         if (!sk)
1644                 goto no_tcp_socket;
1645
1646 process:
1647         if (sk->sk_state == TCP_TIME_WAIT)
1648                 goto do_time_wait;
1649
1650         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1651                 struct request_sock *req = inet_reqsk(sk);
1652                 bool req_stolen = false;
1653                 struct sock *nsk;
1654
1655                 sk = req->rsk_listener;
1656                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1657                         sk_drops_add(sk, skb);
1658                         reqsk_put(req);
1659                         goto discard_it;
1660                 }
1661                 if (tcp_checksum_complete(skb)) {
1662                         reqsk_put(req);
1663                         goto csum_error;
1664                 }
1665                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1666                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1667                         goto lookup;
1668                 }
1669                 sock_hold(sk);
1670                 refcounted = true;
1671                 nsk = NULL;
1672                 if (!tcp_filter(sk, skb)) {
1673                         th = (const struct tcphdr *)skb->data;
1674                         hdr = ipv6_hdr(skb);
1675                         tcp_v6_fill_cb(skb, hdr, th);
1676                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1677                 }
1678                 if (!nsk) {
1679                         reqsk_put(req);
1680                         if (req_stolen) {
1681                                 /* Another cpu got exclusive access to req
1682                                  * and created a full blown socket.
1683                                  * Try to feed this packet to this socket
1684                                  * instead of discarding it.
1685                                  */
1686                                 tcp_v6_restore_cb(skb);
1687                                 sock_put(sk);
1688                                 goto lookup;
1689                         }
1690                         goto discard_and_relse;
1691                 }
1692                 if (nsk == sk) {
1693                         reqsk_put(req);
1694                         tcp_v6_restore_cb(skb);
1695                 } else if (tcp_child_process(sk, nsk, skb)) {
1696                         tcp_v6_send_reset(nsk, skb);
1697                         goto discard_and_relse;
1698                 } else {
1699                         sock_put(sk);
1700                         return 0;
1701                 }
1702         }
1703         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1704                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1705                 goto discard_and_relse;
1706         }
1707
1708         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1709                 goto discard_and_relse;
1710
1711         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1712                 goto discard_and_relse;
1713
1714         if (tcp_filter(sk, skb))
1715                 goto discard_and_relse;
1716         th = (const struct tcphdr *)skb->data;
1717         hdr = ipv6_hdr(skb);
1718         tcp_v6_fill_cb(skb, hdr, th);
1719
1720         skb->dev = NULL;
1721
1722         if (sk->sk_state == TCP_LISTEN) {
1723                 ret = tcp_v6_do_rcv(sk, skb);
1724                 goto put_and_return;
1725         }
1726
1727         sk_incoming_cpu_update(sk);
1728
1729         bh_lock_sock_nested(sk);
1730         tcp_segs_in(tcp_sk(sk), skb);
1731         ret = 0;
1732         if (!sock_owned_by_user(sk)) {
1733                 skb_to_free = sk->sk_rx_skb_cache;
1734                 sk->sk_rx_skb_cache = NULL;
1735                 ret = tcp_v6_do_rcv(sk, skb);
1736         } else {
1737                 if (tcp_add_backlog(sk, skb))
1738                         goto discard_and_relse;
1739                 skb_to_free = NULL;
1740         }
1741         bh_unlock_sock(sk);
1742         if (skb_to_free)
1743                 __kfree_skb(skb_to_free);
1744 put_and_return:
1745         if (refcounted)
1746                 sock_put(sk);
1747         return ret ? -1 : 0;
1748
1749 no_tcp_socket:
1750         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1751                 goto discard_it;
1752
1753         tcp_v6_fill_cb(skb, hdr, th);
1754
1755         if (tcp_checksum_complete(skb)) {
1756 csum_error:
1757                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1758 bad_packet:
1759                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1760         } else {
1761                 tcp_v6_send_reset(NULL, skb);
1762         }
1763
1764 discard_it:
1765         kfree_skb(skb);
1766         return 0;
1767
1768 discard_and_relse:
1769         sk_drops_add(sk, skb);
1770         if (refcounted)
1771                 sock_put(sk);
1772         goto discard_it;
1773
1774 do_time_wait:
1775         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1776                 inet_twsk_put(inet_twsk(sk));
1777                 goto discard_it;
1778         }
1779
1780         tcp_v6_fill_cb(skb, hdr, th);
1781
1782         if (tcp_checksum_complete(skb)) {
1783                 inet_twsk_put(inet_twsk(sk));
1784                 goto csum_error;
1785         }
1786
1787         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1788         case TCP_TW_SYN:
1789         {
1790                 struct sock *sk2;
1791
1792                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1793                                             skb, __tcp_hdrlen(th),
1794                                             &ipv6_hdr(skb)->saddr, th->source,
1795                                             &ipv6_hdr(skb)->daddr,
1796                                             ntohs(th->dest),
1797                                             tcp_v6_iif_l3_slave(skb),
1798                                             sdif);
1799                 if (sk2) {
1800                         struct inet_timewait_sock *tw = inet_twsk(sk);
1801                         inet_twsk_deschedule_put(tw);
1802                         sk = sk2;
1803                         tcp_v6_restore_cb(skb);
1804                         refcounted = false;
1805                         goto process;
1806                 }
1807         }
1808                 /* to ACK */
1809                 fallthrough;
1810         case TCP_TW_ACK:
1811                 tcp_v6_timewait_ack(sk, skb);
1812                 break;
1813         case TCP_TW_RST:
1814                 tcp_v6_send_reset(sk, skb);
1815                 inet_twsk_deschedule_put(inet_twsk(sk));
1816                 goto discard_it;
1817         case TCP_TW_SUCCESS:
1818                 ;
1819         }
1820         goto discard_it;
1821 }
1822
1823 void tcp_v6_early_demux(struct sk_buff *skb)
1824 {
1825         const struct ipv6hdr *hdr;
1826         const struct tcphdr *th;
1827         struct sock *sk;
1828
1829         if (skb->pkt_type != PACKET_HOST)
1830                 return;
1831
1832         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1833                 return;
1834
1835         hdr = ipv6_hdr(skb);
1836         th = tcp_hdr(skb);
1837
1838         if (th->doff < sizeof(struct tcphdr) / 4)
1839                 return;
1840
1841         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1842         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1843                                         &hdr->saddr, th->source,
1844                                         &hdr->daddr, ntohs(th->dest),
1845                                         inet6_iif(skb), inet6_sdif(skb));
1846         if (sk) {
1847                 skb->sk = sk;
1848                 skb->destructor = sock_edemux;
1849                 if (sk_fullsock(sk)) {
1850                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1851
1852                         if (dst)
1853                                 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1854                         if (dst &&
1855                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1856                                 skb_dst_set_noref(skb, dst);
1857                 }
1858         }
1859 }
1860
1861 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1862         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1863         .twsk_unique    = tcp_twsk_unique,
1864         .twsk_destructor = tcp_twsk_destructor,
1865 };
1866
1867 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1868 {
1869         struct ipv6_pinfo *np = inet6_sk(sk);
1870
1871         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1872 }
1873
1874 const struct inet_connection_sock_af_ops ipv6_specific = {
1875         .queue_xmit        = inet6_csk_xmit,
1876         .send_check        = tcp_v6_send_check,
1877         .rebuild_header    = inet6_sk_rebuild_header,
1878         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1879         .conn_request      = tcp_v6_conn_request,
1880         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1881         .net_header_len    = sizeof(struct ipv6hdr),
1882         .net_frag_header_len = sizeof(struct frag_hdr),
1883         .setsockopt        = ipv6_setsockopt,
1884         .getsockopt        = ipv6_getsockopt,
1885         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1886         .sockaddr_len      = sizeof(struct sockaddr_in6),
1887         .mtu_reduced       = tcp_v6_mtu_reduced,
1888 };
1889
1890 #ifdef CONFIG_TCP_MD5SIG
1891 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1892         .md5_lookup     =       tcp_v6_md5_lookup,
1893         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1894         .md5_parse      =       tcp_v6_parse_md5_keys,
1895 };
1896 #endif
1897
1898 /*
1899  *      TCP over IPv4 via INET6 API
1900  */
1901 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1902         .queue_xmit        = ip_queue_xmit,
1903         .send_check        = tcp_v4_send_check,
1904         .rebuild_header    = inet_sk_rebuild_header,
1905         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1906         .conn_request      = tcp_v6_conn_request,
1907         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1908         .net_header_len    = sizeof(struct iphdr),
1909         .setsockopt        = ipv6_setsockopt,
1910         .getsockopt        = ipv6_getsockopt,
1911         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1912         .sockaddr_len      = sizeof(struct sockaddr_in6),
1913         .mtu_reduced       = tcp_v4_mtu_reduced,
1914 };
1915
1916 #ifdef CONFIG_TCP_MD5SIG
1917 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1918         .md5_lookup     =       tcp_v4_md5_lookup,
1919         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1920         .md5_parse      =       tcp_v6_parse_md5_keys,
1921 };
1922 #endif
1923
1924 /* NOTE: A lot of things set to zero explicitly by call to
1925  *       sk_alloc() so need not be done here.
1926  */
1927 static int tcp_v6_init_sock(struct sock *sk)
1928 {
1929         struct inet_connection_sock *icsk = inet_csk(sk);
1930
1931         tcp_init_sock(sk);
1932
1933         icsk->icsk_af_ops = &ipv6_specific;
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1937 #endif
1938
1939         return 0;
1940 }
1941
1942 #ifdef CONFIG_PROC_FS
1943 /* Proc filesystem TCPv6 sock list dumping. */
1944 static void get_openreq6(struct seq_file *seq,
1945                          const struct request_sock *req, int i)
1946 {
1947         long ttd = req->rsk_timer.expires - jiffies;
1948         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1949         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1950
1951         if (ttd < 0)
1952                 ttd = 0;
1953
1954         seq_printf(seq,
1955                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1956                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1957                    i,
1958                    src->s6_addr32[0], src->s6_addr32[1],
1959                    src->s6_addr32[2], src->s6_addr32[3],
1960                    inet_rsk(req)->ir_num,
1961                    dest->s6_addr32[0], dest->s6_addr32[1],
1962                    dest->s6_addr32[2], dest->s6_addr32[3],
1963                    ntohs(inet_rsk(req)->ir_rmt_port),
1964                    TCP_SYN_RECV,
1965                    0, 0, /* could print option size, but that is af dependent. */
1966                    1,   /* timers active (only the expire timer) */
1967                    jiffies_to_clock_t(ttd),
1968                    req->num_timeout,
1969                    from_kuid_munged(seq_user_ns(seq),
1970                                     sock_i_uid(req->rsk_listener)),
1971                    0,  /* non standard timer */
1972                    0, /* open_requests have no inode */
1973                    0, req);
1974 }
1975
1976 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1977 {
1978         const struct in6_addr *dest, *src;
1979         __u16 destp, srcp;
1980         int timer_active;
1981         unsigned long timer_expires;
1982         const struct inet_sock *inet = inet_sk(sp);
1983         const struct tcp_sock *tp = tcp_sk(sp);
1984         const struct inet_connection_sock *icsk = inet_csk(sp);
1985         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1986         int rx_queue;
1987         int state;
1988
1989         dest  = &sp->sk_v6_daddr;
1990         src   = &sp->sk_v6_rcv_saddr;
1991         destp = ntohs(inet->inet_dport);
1992         srcp  = ntohs(inet->inet_sport);
1993
1994         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1995             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1996             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1997                 timer_active    = 1;
1998                 timer_expires   = icsk->icsk_timeout;
1999         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2000                 timer_active    = 4;
2001                 timer_expires   = icsk->icsk_timeout;
2002         } else if (timer_pending(&sp->sk_timer)) {
2003                 timer_active    = 2;
2004                 timer_expires   = sp->sk_timer.expires;
2005         } else {
2006                 timer_active    = 0;
2007                 timer_expires = jiffies;
2008         }
2009
2010         state = inet_sk_state_load(sp);
2011         if (state == TCP_LISTEN)
2012                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2013         else
2014                 /* Because we don't lock the socket,
2015                  * we might find a transient negative value.
2016                  */
2017                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2018                                       READ_ONCE(tp->copied_seq), 0);
2019
2020         seq_printf(seq,
2021                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2022                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2023                    i,
2024                    src->s6_addr32[0], src->s6_addr32[1],
2025                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2026                    dest->s6_addr32[0], dest->s6_addr32[1],
2027                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2028                    state,
2029                    READ_ONCE(tp->write_seq) - tp->snd_una,
2030                    rx_queue,
2031                    timer_active,
2032                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2033                    icsk->icsk_retransmits,
2034                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2035                    icsk->icsk_probes_out,
2036                    sock_i_ino(sp),
2037                    refcount_read(&sp->sk_refcnt), sp,
2038                    jiffies_to_clock_t(icsk->icsk_rto),
2039                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2040                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2041                    tp->snd_cwnd,
2042                    state == TCP_LISTEN ?
2043                         fastopenq->max_qlen :
2044                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2045                    );
2046 }
2047
2048 static void get_timewait6_sock(struct seq_file *seq,
2049                                struct inet_timewait_sock *tw, int i)
2050 {
2051         long delta = tw->tw_timer.expires - jiffies;
2052         const struct in6_addr *dest, *src;
2053         __u16 destp, srcp;
2054
2055         dest = &tw->tw_v6_daddr;
2056         src  = &tw->tw_v6_rcv_saddr;
2057         destp = ntohs(tw->tw_dport);
2058         srcp  = ntohs(tw->tw_sport);
2059
2060         seq_printf(seq,
2061                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2063                    i,
2064                    src->s6_addr32[0], src->s6_addr32[1],
2065                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2066                    dest->s6_addr32[0], dest->s6_addr32[1],
2067                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2068                    tw->tw_substate, 0, 0,
2069                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2070                    refcount_read(&tw->tw_refcnt), tw);
2071 }
2072
2073 static int tcp6_seq_show(struct seq_file *seq, void *v)
2074 {
2075         struct tcp_iter_state *st;
2076         struct sock *sk = v;
2077
2078         if (v == SEQ_START_TOKEN) {
2079                 seq_puts(seq,
2080                          "  sl  "
2081                          "local_address                         "
2082                          "remote_address                        "
2083                          "st tx_queue rx_queue tr tm->when retrnsmt"
2084                          "   uid  timeout inode\n");
2085                 goto out;
2086         }
2087         st = seq->private;
2088
2089         if (sk->sk_state == TCP_TIME_WAIT)
2090                 get_timewait6_sock(seq, v, st->num);
2091         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2092                 get_openreq6(seq, v, st->num);
2093         else
2094                 get_tcp6_sock(seq, v, st->num);
2095 out:
2096         return 0;
2097 }
2098
2099 static const struct seq_operations tcp6_seq_ops = {
2100         .show           = tcp6_seq_show,
2101         .start          = tcp_seq_start,
2102         .next           = tcp_seq_next,
2103         .stop           = tcp_seq_stop,
2104 };
2105
2106 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2107         .family         = AF_INET6,
2108 };
2109
2110 int __net_init tcp6_proc_init(struct net *net)
2111 {
2112         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2113                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2114                 return -ENOMEM;
2115         return 0;
2116 }
2117
2118 void tcp6_proc_exit(struct net *net)
2119 {
2120         remove_proc_entry("tcp6", net->proc_net);
2121 }
2122 #endif
2123
2124 struct proto tcpv6_prot = {
2125         .name                   = "TCPv6",
2126         .owner                  = THIS_MODULE,
2127         .close                  = tcp_close,
2128         .pre_connect            = tcp_v6_pre_connect,
2129         .connect                = tcp_v6_connect,
2130         .disconnect             = tcp_disconnect,
2131         .accept                 = inet_csk_accept,
2132         .ioctl                  = tcp_ioctl,
2133         .init                   = tcp_v6_init_sock,
2134         .destroy                = tcp_v4_destroy_sock,
2135         .shutdown               = tcp_shutdown,
2136         .setsockopt             = tcp_setsockopt,
2137         .getsockopt             = tcp_getsockopt,
2138         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2139         .keepalive              = tcp_set_keepalive,
2140         .recvmsg                = tcp_recvmsg,
2141         .sendmsg                = tcp_sendmsg,
2142         .sendpage               = tcp_sendpage,
2143         .backlog_rcv            = tcp_v6_do_rcv,
2144         .release_cb             = tcp_release_cb,
2145         .hash                   = inet6_hash,
2146         .unhash                 = inet_unhash,
2147         .get_port               = inet_csk_get_port,
2148         .enter_memory_pressure  = tcp_enter_memory_pressure,
2149         .leave_memory_pressure  = tcp_leave_memory_pressure,
2150         .stream_memory_free     = tcp_stream_memory_free,
2151         .sockets_allocated      = &tcp_sockets_allocated,
2152         .memory_allocated       = &tcp_memory_allocated,
2153         .memory_pressure        = &tcp_memory_pressure,
2154         .orphan_count           = &tcp_orphan_count,
2155         .sysctl_mem             = sysctl_tcp_mem,
2156         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2157         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2158         .max_header             = MAX_TCP_HEADER,
2159         .obj_size               = sizeof(struct tcp6_sock),
2160         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2161         .twsk_prot              = &tcp6_timewait_sock_ops,
2162         .rsk_prot               = &tcp6_request_sock_ops,
2163         .h.hashinfo             = &tcp_hashinfo,
2164         .no_autobind            = true,
2165         .diag_destroy           = tcp_abort,
2166 };
2167 EXPORT_SYMBOL_GPL(tcpv6_prot);
2168
2169 static const struct inet6_protocol tcpv6_protocol = {
2170         .handler        =       tcp_v6_rcv,
2171         .err_handler    =       tcp_v6_err,
2172         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2173 };
2174
2175 static struct inet_protosw tcpv6_protosw = {
2176         .type           =       SOCK_STREAM,
2177         .protocol       =       IPPROTO_TCP,
2178         .prot           =       &tcpv6_prot,
2179         .ops            =       &inet6_stream_ops,
2180         .flags          =       INET_PROTOSW_PERMANENT |
2181                                 INET_PROTOSW_ICSK,
2182 };
2183
2184 static int __net_init tcpv6_net_init(struct net *net)
2185 {
2186         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2187                                     SOCK_RAW, IPPROTO_TCP, net);
2188 }
2189
2190 static void __net_exit tcpv6_net_exit(struct net *net)
2191 {
2192         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2193 }
2194
2195 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2196 {
2197         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2198 }
2199
2200 static struct pernet_operations tcpv6_net_ops = {
2201         .init       = tcpv6_net_init,
2202         .exit       = tcpv6_net_exit,
2203         .exit_batch = tcpv6_net_exit_batch,
2204 };
2205
2206 int __init tcpv6_init(void)
2207 {
2208         int ret;
2209
2210         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2211         if (ret)
2212                 goto out;
2213
2214         /* register inet6 protocol */
2215         ret = inet6_register_protosw(&tcpv6_protosw);
2216         if (ret)
2217                 goto out_tcpv6_protocol;
2218
2219         ret = register_pernet_subsys(&tcpv6_net_ops);
2220         if (ret)
2221                 goto out_tcpv6_protosw;
2222
2223         ret = mptcpv6_init();
2224         if (ret)
2225                 goto out_tcpv6_pernet_subsys;
2226
2227 out:
2228         return ret;
2229
2230 out_tcpv6_pernet_subsys:
2231         unregister_pernet_subsys(&tcpv6_net_ops);
2232 out_tcpv6_protosw:
2233         inet6_unregister_protosw(&tcpv6_protosw);
2234 out_tcpv6_protocol:
2235         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2236         goto out;
2237 }
2238
2239 void tcpv6_exit(void)
2240 {
2241         unregister_pernet_subsys(&tcpv6_net_ops);
2242         inet6_unregister_protosw(&tcpv6_protosw);
2243         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2244 }