GNU Linux-libre 5.19-rc6-gnu
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct tcp_sock *tp = tcp_sk(sk);
154         struct in6_addr *saddr = NULL, *final_p, final;
155         struct ipv6_txoptions *opt;
156         struct flowi6 fl6;
157         struct dst_entry *dst;
158         int addr_type;
159         int err;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowi6_oif = sk->sk_bound_dev_if;
273         fl6.flowi6_mark = sk->sk_mark;
274         fl6.fl6_dport = usin->sin6_port;
275         fl6.fl6_sport = inet->inet_sport;
276         fl6.flowi6_uid = sk->sk_uid;
277
278         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279         final_p = fl6_update_dst(&fl6, opt, &final);
280
281         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284         if (IS_ERR(dst)) {
285                 err = PTR_ERR(dst);
286                 goto failure;
287         }
288
289         if (!saddr) {
290                 saddr = &fl6.saddr;
291                 sk->sk_v6_rcv_saddr = *saddr;
292         }
293
294         /* set the source address */
295         np->saddr = *saddr;
296         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298         sk->sk_gso_type = SKB_GSO_TCPV6;
299         ip6_dst_store(sk, dst, NULL, NULL);
300
301         icsk->icsk_ext_hdr_len = 0;
302         if (opt)
303                 icsk->icsk_ext_hdr_len = opt->opt_flen +
304                                          opt->opt_nflen;
305
306         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308         inet->inet_dport = usin->sin6_port;
309
310         tcp_set_state(sk, TCP_SYN_SENT);
311         tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312         err = inet6_hash_connect(tcp_death_row, sk);
313         if (err)
314                 goto late_failure;
315
316         sk_set_txhash(sk);
317
318         if (likely(!tp->repair)) {
319                 if (!tp->write_seq)
320                         WRITE_ONCE(tp->write_seq,
321                                    secure_tcpv6_seq(np->saddr.s6_addr32,
322                                                     sk->sk_v6_daddr.s6_addr32,
323                                                     inet->inet_sport,
324                                                     inet->inet_dport));
325                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326                                                    np->saddr.s6_addr32,
327                                                    sk->sk_v6_daddr.s6_addr32);
328         }
329
330         if (tcp_fastopen_defer_connect(sk, &err))
331                 return err;
332         if (err)
333                 goto late_failure;
334
335         err = tcp_connect(sk);
336         if (err)
337                 goto late_failure;
338
339         return 0;
340
341 late_failure:
342         tcp_set_state(sk, TCP_CLOSE);
343 failure:
344         inet->inet_dport = 0;
345         sk->sk_route_caps = 0;
346         return err;
347 }
348
349 static void tcp_v6_mtu_reduced(struct sock *sk)
350 {
351         struct dst_entry *dst;
352         u32 mtu;
353
354         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355                 return;
356
357         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358
359         /* Drop requests trying to increase our current mss.
360          * Check done in __ip6_rt_update_pmtu() is too late.
361          */
362         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363                 return;
364
365         dst = inet6_csk_update_pmtu(sk, mtu);
366         if (!dst)
367                 return;
368
369         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370                 tcp_sync_mss(sk, dst_mtu(dst));
371                 tcp_simple_retransmit(sk);
372         }
373 }
374
375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376                 u8 type, u8 code, int offset, __be32 info)
377 {
378         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380         struct net *net = dev_net(skb->dev);
381         struct request_sock *fastopen;
382         struct ipv6_pinfo *np;
383         struct tcp_sock *tp;
384         __u32 seq, snd_una;
385         struct sock *sk;
386         bool fatal;
387         int err;
388
389         sk = __inet6_lookup_established(net, &tcp_hashinfo,
390                                         &hdr->daddr, th->dest,
391                                         &hdr->saddr, ntohs(th->source),
392                                         skb->dev->ifindex, inet6_sdif(skb));
393
394         if (!sk) {
395                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
396                                   ICMP6_MIB_INERRORS);
397                 return -ENOENT;
398         }
399
400         if (sk->sk_state == TCP_TIME_WAIT) {
401                 inet_twsk_put(inet_twsk(sk));
402                 return 0;
403         }
404         seq = ntohl(th->seq);
405         fatal = icmpv6_err_convert(type, code, &err);
406         if (sk->sk_state == TCP_NEW_SYN_RECV) {
407                 tcp_req_err(sk, seq, fatal);
408                 return 0;
409         }
410
411         bh_lock_sock(sk);
412         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414
415         if (sk->sk_state == TCP_CLOSE)
416                 goto out;
417
418         if (static_branch_unlikely(&ip6_min_hopcount)) {
419                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422                         goto out;
423                 }
424         }
425
426         tp = tcp_sk(sk);
427         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428         fastopen = rcu_dereference(tp->fastopen_rsk);
429         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430         if (sk->sk_state != TCP_LISTEN &&
431             !between(seq, snd_una, tp->snd_nxt)) {
432                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433                 goto out;
434         }
435
436         np = tcp_inet6_sk(sk);
437
438         if (type == NDISC_REDIRECT) {
439                 if (!sock_owned_by_user(sk)) {
440                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441
442                         if (dst)
443                                 dst->ops->redirect(dst, sk, skb);
444                 }
445                 goto out;
446         }
447
448         if (type == ICMPV6_PKT_TOOBIG) {
449                 u32 mtu = ntohl(info);
450
451                 /* We are not interested in TCP_LISTEN and open_requests
452                  * (SYN-ACKs send out by Linux are always <576bytes so
453                  * they should go through unfragmented).
454                  */
455                 if (sk->sk_state == TCP_LISTEN)
456                         goto out;
457
458                 if (!ip6_sk_accept_pmtu(sk))
459                         goto out;
460
461                 if (mtu < IPV6_MIN_MTU)
462                         goto out;
463
464                 WRITE_ONCE(tp->mtu_info, mtu);
465
466                 if (!sock_owned_by_user(sk))
467                         tcp_v6_mtu_reduced(sk);
468                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469                                            &sk->sk_tsq_flags))
470                         sock_hold(sk);
471                 goto out;
472         }
473
474
475         /* Might be for an request_sock */
476         switch (sk->sk_state) {
477         case TCP_SYN_SENT:
478         case TCP_SYN_RECV:
479                 /* Only in fast or simultaneous open. If a fast open socket is
480                  * already accepted it is treated as a connected one below.
481                  */
482                 if (fastopen && !fastopen->sk)
483                         break;
484
485                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486
487                 if (!sock_owned_by_user(sk)) {
488                         sk->sk_err = err;
489                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
490
491                         tcp_done(sk);
492                 } else
493                         sk->sk_err_soft = err;
494                 goto out;
495         case TCP_LISTEN:
496                 break;
497         default:
498                 /* check if this ICMP message allows revert of backoff.
499                  * (see RFC 6069)
500                  */
501                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502                     code == ICMPV6_NOROUTE)
503                         tcp_ld_RTO_revert(sk, seq);
504         }
505
506         if (!sock_owned_by_user(sk) && np->recverr) {
507                 sk->sk_err = err;
508                 sk_error_report(sk);
509         } else
510                 sk->sk_err_soft = err;
511
512 out:
513         bh_unlock_sock(sk);
514         sock_put(sk);
515         return 0;
516 }
517
518
519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520                               struct flowi *fl,
521                               struct request_sock *req,
522                               struct tcp_fastopen_cookie *foc,
523                               enum tcp_synack_type synack_type,
524                               struct sk_buff *syn_skb)
525 {
526         struct inet_request_sock *ireq = inet_rsk(req);
527         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528         struct ipv6_txoptions *opt;
529         struct flowi6 *fl6 = &fl->u.ip6;
530         struct sk_buff *skb;
531         int err = -ENOMEM;
532         u8 tclass;
533
534         /* First, grab a route. */
535         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536                                                IPPROTO_TCP)) == NULL)
537                 goto done;
538
539         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540
541         if (skb) {
542                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543                                     &ireq->ir_v6_rmt_addr);
544
545                 fl6->daddr = ireq->ir_v6_rmt_addr;
546                 if (np->repflow && ireq->pktopts)
547                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548
549                 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
550                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551                                 (np->tclass & INET_ECN_MASK) :
552                                 np->tclass;
553
554                 if (!INET_ECN_is_capable(tclass) &&
555                     tcp_bpf_ca_needs_ecn((struct sock *)req))
556                         tclass |= INET_ECN_ECT_0;
557
558                 rcu_read_lock();
559                 opt = ireq->ipv6_opt;
560                 if (!opt)
561                         opt = rcu_dereference(np->opt);
562                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563                                tclass, sk->sk_priority);
564                 rcu_read_unlock();
565                 err = net_xmit_eval(err);
566         }
567
568 done:
569         return err;
570 }
571
572
573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575         kfree(inet_rsk(req)->ipv6_opt);
576         consume_skb(inet_rsk(req)->pktopts);
577 }
578
579 #ifdef CONFIG_TCP_MD5SIG
580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581                                                    const struct in6_addr *addr,
582                                                    int l3index)
583 {
584         return tcp_md5_do_lookup(sk, l3index,
585                                  (union tcp_md5_addr *)addr, AF_INET6);
586 }
587
588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589                                                 const struct sock *addr_sk)
590 {
591         int l3index;
592
593         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594                                                  addr_sk->sk_bound_dev_if);
595         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596                                     l3index);
597 }
598
599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600                                  sockptr_t optval, int optlen)
601 {
602         struct tcp_md5sig cmd;
603         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604         int l3index = 0;
605         u8 prefixlen;
606         u8 flags;
607
608         if (optlen < sizeof(cmd))
609                 return -EINVAL;
610
611         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612                 return -EFAULT;
613
614         if (sin6->sin6_family != AF_INET6)
615                 return -EINVAL;
616
617         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
618
619         if (optname == TCP_MD5SIG_EXT &&
620             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621                 prefixlen = cmd.tcpm_prefixlen;
622                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
623                                         prefixlen > 32))
624                         return -EINVAL;
625         } else {
626                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
627         }
628
629         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631                 struct net_device *dev;
632
633                 rcu_read_lock();
634                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635                 if (dev && netif_is_l3_master(dev))
636                         l3index = dev->ifindex;
637                 rcu_read_unlock();
638
639                 /* ok to reference set/not set outside of rcu;
640                  * right now device MUST be an L3 master
641                  */
642                 if (!dev || !l3index)
643                         return -EINVAL;
644         }
645
646         if (!cmd.tcpm_keylen) {
647                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
649                                               AF_INET, prefixlen,
650                                               l3index, flags);
651                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652                                       AF_INET6, prefixlen, l3index, flags);
653         }
654
655         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
656                 return -EINVAL;
657
658         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660                                       AF_INET, prefixlen, l3index, flags,
661                                       cmd.tcpm_key, cmd.tcpm_keylen,
662                                       GFP_KERNEL);
663
664         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665                               AF_INET6, prefixlen, l3index, flags,
666                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
667 }
668
669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670                                    const struct in6_addr *daddr,
671                                    const struct in6_addr *saddr,
672                                    const struct tcphdr *th, int nbytes)
673 {
674         struct tcp6_pseudohdr *bp;
675         struct scatterlist sg;
676         struct tcphdr *_th;
677
678         bp = hp->scratch;
679         /* 1. TCP pseudo-header (RFC2460) */
680         bp->saddr = *saddr;
681         bp->daddr = *daddr;
682         bp->protocol = cpu_to_be32(IPPROTO_TCP);
683         bp->len = cpu_to_be32(nbytes);
684
685         _th = (struct tcphdr *)(bp + 1);
686         memcpy(_th, th, sizeof(*th));
687         _th->check = 0;
688
689         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691                                 sizeof(*bp) + sizeof(*th));
692         return crypto_ahash_update(hp->md5_req);
693 }
694
695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696                                const struct in6_addr *daddr, struct in6_addr *saddr,
697                                const struct tcphdr *th)
698 {
699         struct tcp_md5sig_pool *hp;
700         struct ahash_request *req;
701
702         hp = tcp_get_md5sig_pool();
703         if (!hp)
704                 goto clear_hash_noput;
705         req = hp->md5_req;
706
707         if (crypto_ahash_init(req))
708                 goto clear_hash;
709         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
710                 goto clear_hash;
711         if (tcp_md5_hash_key(hp, key))
712                 goto clear_hash;
713         ahash_request_set_crypt(req, NULL, md5_hash, 0);
714         if (crypto_ahash_final(req))
715                 goto clear_hash;
716
717         tcp_put_md5sig_pool();
718         return 0;
719
720 clear_hash:
721         tcp_put_md5sig_pool();
722 clear_hash_noput:
723         memset(md5_hash, 0, 16);
724         return 1;
725 }
726
727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728                                const struct tcp_md5sig_key *key,
729                                const struct sock *sk,
730                                const struct sk_buff *skb)
731 {
732         const struct in6_addr *saddr, *daddr;
733         struct tcp_md5sig_pool *hp;
734         struct ahash_request *req;
735         const struct tcphdr *th = tcp_hdr(skb);
736
737         if (sk) { /* valid for establish/request sockets */
738                 saddr = &sk->sk_v6_rcv_saddr;
739                 daddr = &sk->sk_v6_daddr;
740         } else {
741                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742                 saddr = &ip6h->saddr;
743                 daddr = &ip6h->daddr;
744         }
745
746         hp = tcp_get_md5sig_pool();
747         if (!hp)
748                 goto clear_hash_noput;
749         req = hp->md5_req;
750
751         if (crypto_ahash_init(req))
752                 goto clear_hash;
753
754         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
755                 goto clear_hash;
756         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
757                 goto clear_hash;
758         if (tcp_md5_hash_key(hp, key))
759                 goto clear_hash;
760         ahash_request_set_crypt(req, NULL, md5_hash, 0);
761         if (crypto_ahash_final(req))
762                 goto clear_hash;
763
764         tcp_put_md5sig_pool();
765         return 0;
766
767 clear_hash:
768         tcp_put_md5sig_pool();
769 clear_hash_noput:
770         memset(md5_hash, 0, 16);
771         return 1;
772 }
773
774 #endif
775
776 static void tcp_v6_init_req(struct request_sock *req,
777                             const struct sock *sk_listener,
778                             struct sk_buff *skb)
779 {
780         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
781         struct inet_request_sock *ireq = inet_rsk(req);
782         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
783
784         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
785         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
786
787         /* So that link locals have meaning */
788         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
789             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
790                 ireq->ir_iif = tcp_v6_iif(skb);
791
792         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
793             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
794              np->rxopt.bits.rxinfo ||
795              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
796              np->rxopt.bits.rxohlim || np->repflow)) {
797                 refcount_inc(&skb->users);
798                 ireq->pktopts = skb;
799         }
800 }
801
802 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
803                                           struct sk_buff *skb,
804                                           struct flowi *fl,
805                                           struct request_sock *req)
806 {
807         tcp_v6_init_req(req, sk, skb);
808
809         if (security_inet_conn_request(sk, skb, req))
810                 return NULL;
811
812         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
813 }
814
815 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
816         .family         =       AF_INET6,
817         .obj_size       =       sizeof(struct tcp6_request_sock),
818         .rtx_syn_ack    =       tcp_rtx_synack,
819         .send_ack       =       tcp_v6_reqsk_send_ack,
820         .destructor     =       tcp_v6_reqsk_destructor,
821         .send_reset     =       tcp_v6_send_reset,
822         .syn_ack_timeout =      tcp_syn_ack_timeout,
823 };
824
825 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
826         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
827                                 sizeof(struct ipv6hdr),
828 #ifdef CONFIG_TCP_MD5SIG
829         .req_md5_lookup =       tcp_v6_md5_lookup,
830         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
831 #endif
832 #ifdef CONFIG_SYN_COOKIES
833         .cookie_init_seq =      cookie_v6_init_sequence,
834 #endif
835         .route_req      =       tcp_v6_route_req,
836         .init_seq       =       tcp_v6_init_seq,
837         .init_ts_off    =       tcp_v6_init_ts_off,
838         .send_synack    =       tcp_v6_send_synack,
839 };
840
841 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
842                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
843                                  int oif, struct tcp_md5sig_key *key, int rst,
844                                  u8 tclass, __be32 label, u32 priority)
845 {
846         const struct tcphdr *th = tcp_hdr(skb);
847         struct tcphdr *t1;
848         struct sk_buff *buff;
849         struct flowi6 fl6;
850         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
851         struct sock *ctl_sk = net->ipv6.tcp_sk;
852         unsigned int tot_len = sizeof(struct tcphdr);
853         __be32 mrst = 0, *topt;
854         struct dst_entry *dst;
855         __u32 mark = 0;
856
857         if (tsecr)
858                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
859 #ifdef CONFIG_TCP_MD5SIG
860         if (key)
861                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
862 #endif
863
864 #ifdef CONFIG_MPTCP
865         if (rst && !key) {
866                 mrst = mptcp_reset_option(skb);
867
868                 if (mrst)
869                         tot_len += sizeof(__be32);
870         }
871 #endif
872
873         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
874         if (!buff)
875                 return;
876
877         skb_reserve(buff, MAX_TCP_HEADER);
878
879         t1 = skb_push(buff, tot_len);
880         skb_reset_transport_header(buff);
881
882         /* Swap the send and the receive. */
883         memset(t1, 0, sizeof(*t1));
884         t1->dest = th->source;
885         t1->source = th->dest;
886         t1->doff = tot_len / 4;
887         t1->seq = htonl(seq);
888         t1->ack_seq = htonl(ack);
889         t1->ack = !rst || !th->ack;
890         t1->rst = rst;
891         t1->window = htons(win);
892
893         topt = (__be32 *)(t1 + 1);
894
895         if (tsecr) {
896                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
897                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
898                 *topt++ = htonl(tsval);
899                 *topt++ = htonl(tsecr);
900         }
901
902         if (mrst)
903                 *topt++ = mrst;
904
905 #ifdef CONFIG_TCP_MD5SIG
906         if (key) {
907                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
908                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
909                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
910                                     &ipv6_hdr(skb)->saddr,
911                                     &ipv6_hdr(skb)->daddr, t1);
912         }
913 #endif
914
915         memset(&fl6, 0, sizeof(fl6));
916         fl6.daddr = ipv6_hdr(skb)->saddr;
917         fl6.saddr = ipv6_hdr(skb)->daddr;
918         fl6.flowlabel = label;
919
920         buff->ip_summed = CHECKSUM_PARTIAL;
921
922         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
923
924         fl6.flowi6_proto = IPPROTO_TCP;
925         if (rt6_need_strict(&fl6.daddr) && !oif)
926                 fl6.flowi6_oif = tcp_v6_iif(skb);
927         else {
928                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
929                         oif = skb->skb_iif;
930
931                 fl6.flowi6_oif = oif;
932         }
933
934         if (sk) {
935                 if (sk->sk_state == TCP_TIME_WAIT) {
936                         mark = inet_twsk(sk)->tw_mark;
937                         /* autoflowlabel relies on buff->hash */
938                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
939                                      PKT_HASH_TYPE_L4);
940                 } else {
941                         mark = sk->sk_mark;
942                 }
943                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944         }
945         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
946         fl6.fl6_dport = t1->dest;
947         fl6.fl6_sport = t1->source;
948         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
949         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
950
951         /* Pass a socket to ip6_dst_lookup either it is for RST
952          * Underlying function will use this to retrieve the network
953          * namespace
954          */
955         dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
956         if (!IS_ERR(dst)) {
957                 skb_dst_set(buff, dst);
958                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
959                          tclass & ~INET_ECN_MASK, priority);
960                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
961                 if (rst)
962                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
963                 return;
964         }
965
966         kfree_skb(buff);
967 }
968
969 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
970 {
971         const struct tcphdr *th = tcp_hdr(skb);
972         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
973         u32 seq = 0, ack_seq = 0;
974         struct tcp_md5sig_key *key = NULL;
975 #ifdef CONFIG_TCP_MD5SIG
976         const __u8 *hash_location = NULL;
977         unsigned char newhash[16];
978         int genhash;
979         struct sock *sk1 = NULL;
980 #endif
981         __be32 label = 0;
982         u32 priority = 0;
983         struct net *net;
984         int oif = 0;
985
986         if (th->rst)
987                 return;
988
989         /* If sk not NULL, it means we did a successful lookup and incoming
990          * route had to be correct. prequeue might have dropped our dst.
991          */
992         if (!sk && !ipv6_unicast_destination(skb))
993                 return;
994
995         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
996 #ifdef CONFIG_TCP_MD5SIG
997         rcu_read_lock();
998         hash_location = tcp_parse_md5sig_option(th);
999         if (sk && sk_fullsock(sk)) {
1000                 int l3index;
1001
1002                 /* sdif set, means packet ingressed via a device
1003                  * in an L3 domain and inet_iif is set to it.
1004                  */
1005                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1006                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1007         } else if (hash_location) {
1008                 int dif = tcp_v6_iif_l3_slave(skb);
1009                 int sdif = tcp_v6_sdif(skb);
1010                 int l3index;
1011
1012                 /*
1013                  * active side is lost. Try to find listening socket through
1014                  * source port, and then find md5 key through listening socket.
1015                  * we are not loose security here:
1016                  * Incoming packet is checked with md5 hash with finding key,
1017                  * no RST generated if md5 hash doesn't match.
1018                  */
1019                 sk1 = inet6_lookup_listener(net,
1020                                            &tcp_hashinfo, NULL, 0,
1021                                            &ipv6h->saddr,
1022                                            th->source, &ipv6h->daddr,
1023                                            ntohs(th->source), dif, sdif);
1024                 if (!sk1)
1025                         goto out;
1026
1027                 /* sdif set, means packet ingressed via a device
1028                  * in an L3 domain and dif is set to it.
1029                  */
1030                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1031
1032                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1033                 if (!key)
1034                         goto out;
1035
1036                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1037                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1038                         goto out;
1039         }
1040 #endif
1041
1042         if (th->ack)
1043                 seq = ntohl(th->ack_seq);
1044         else
1045                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1046                           (th->doff << 2);
1047
1048         if (sk) {
1049                 oif = sk->sk_bound_dev_if;
1050                 if (sk_fullsock(sk)) {
1051                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1052
1053                         trace_tcp_send_reset(sk, skb);
1054                         if (np->repflow)
1055                                 label = ip6_flowlabel(ipv6h);
1056                         priority = sk->sk_priority;
1057                 }
1058                 if (sk->sk_state == TCP_TIME_WAIT) {
1059                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1060                         priority = inet_twsk(sk)->tw_priority;
1061                 }
1062         } else {
1063                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1064                         label = ip6_flowlabel(ipv6h);
1065         }
1066
1067         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1068                              ipv6_get_dsfield(ipv6h), label, priority);
1069
1070 #ifdef CONFIG_TCP_MD5SIG
1071 out:
1072         rcu_read_unlock();
1073 #endif
1074 }
1075
1076 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1077                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1078                             struct tcp_md5sig_key *key, u8 tclass,
1079                             __be32 label, u32 priority)
1080 {
1081         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1082                              tclass, label, priority);
1083 }
1084
1085 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1086 {
1087         struct inet_timewait_sock *tw = inet_twsk(sk);
1088         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1089
1090         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1091                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1092                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1093                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1094                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1095
1096         inet_twsk_put(tw);
1097 }
1098
1099 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1100                                   struct request_sock *req)
1101 {
1102         int l3index;
1103
1104         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1105
1106         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1107          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1108          */
1109         /* RFC 7323 2.3
1110          * The window field (SEG.WND) of every outgoing segment, with the
1111          * exception of <SYN> segments, MUST be right-shifted by
1112          * Rcv.Wind.Shift bits:
1113          */
1114         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1115                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1116                         tcp_rsk(req)->rcv_nxt,
1117                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1118                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1119                         req->ts_recent, sk->sk_bound_dev_if,
1120                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1121                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1122 }
1123
1124
1125 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1126 {
1127 #ifdef CONFIG_SYN_COOKIES
1128         const struct tcphdr *th = tcp_hdr(skb);
1129
1130         if (!th->syn)
1131                 sk = cookie_v6_check(sk, skb);
1132 #endif
1133         return sk;
1134 }
1135
1136 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1137                          struct tcphdr *th, u32 *cookie)
1138 {
1139         u16 mss = 0;
1140 #ifdef CONFIG_SYN_COOKIES
1141         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1142                                     &tcp_request_sock_ipv6_ops, sk, th);
1143         if (mss) {
1144                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1145                 tcp_synq_overflow(sk);
1146         }
1147 #endif
1148         return mss;
1149 }
1150
1151 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1152 {
1153         if (skb->protocol == htons(ETH_P_IP))
1154                 return tcp_v4_conn_request(sk, skb);
1155
1156         if (!ipv6_unicast_destination(skb))
1157                 goto drop;
1158
1159         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1160                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1161                 return 0;
1162         }
1163
1164         return tcp_conn_request(&tcp6_request_sock_ops,
1165                                 &tcp_request_sock_ipv6_ops, sk, skb);
1166
1167 drop:
1168         tcp_listendrop(sk);
1169         return 0; /* don't send reset */
1170 }
1171
1172 static void tcp_v6_restore_cb(struct sk_buff *skb)
1173 {
1174         /* We need to move header back to the beginning if xfrm6_policy_check()
1175          * and tcp_v6_fill_cb() are going to be called again.
1176          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1177          */
1178         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1179                 sizeof(struct inet6_skb_parm));
1180 }
1181
1182 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1183                                          struct request_sock *req,
1184                                          struct dst_entry *dst,
1185                                          struct request_sock *req_unhash,
1186                                          bool *own_req)
1187 {
1188         struct inet_request_sock *ireq;
1189         struct ipv6_pinfo *newnp;
1190         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1191         struct ipv6_txoptions *opt;
1192         struct inet_sock *newinet;
1193         bool found_dup_sk = false;
1194         struct tcp_sock *newtp;
1195         struct sock *newsk;
1196 #ifdef CONFIG_TCP_MD5SIG
1197         struct tcp_md5sig_key *key;
1198         int l3index;
1199 #endif
1200         struct flowi6 fl6;
1201
1202         if (skb->protocol == htons(ETH_P_IP)) {
1203                 /*
1204                  *      v6 mapped
1205                  */
1206
1207                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1208                                              req_unhash, own_req);
1209
1210                 if (!newsk)
1211                         return NULL;
1212
1213                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1214
1215                 newnp = tcp_inet6_sk(newsk);
1216                 newtp = tcp_sk(newsk);
1217
1218                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1219
1220                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1221
1222                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1223                 if (sk_is_mptcp(newsk))
1224                         mptcpv6_handle_mapped(newsk, true);
1225                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1226 #ifdef CONFIG_TCP_MD5SIG
1227                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1228 #endif
1229
1230                 newnp->ipv6_mc_list = NULL;
1231                 newnp->ipv6_ac_list = NULL;
1232                 newnp->ipv6_fl_list = NULL;
1233                 newnp->pktoptions  = NULL;
1234                 newnp->opt         = NULL;
1235                 newnp->mcast_oif   = inet_iif(skb);
1236                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1237                 newnp->rcv_flowinfo = 0;
1238                 if (np->repflow)
1239                         newnp->flow_label = 0;
1240
1241                 /*
1242                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1243                  * here, tcp_create_openreq_child now does this for us, see the comment in
1244                  * that function for the gory details. -acme
1245                  */
1246
1247                 /* It is tricky place. Until this moment IPv4 tcp
1248                    worked with IPv6 icsk.icsk_af_ops.
1249                    Sync it now.
1250                  */
1251                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1252
1253                 return newsk;
1254         }
1255
1256         ireq = inet_rsk(req);
1257
1258         if (sk_acceptq_is_full(sk))
1259                 goto out_overflow;
1260
1261         if (!dst) {
1262                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1263                 if (!dst)
1264                         goto out;
1265         }
1266
1267         newsk = tcp_create_openreq_child(sk, req, skb);
1268         if (!newsk)
1269                 goto out_nonewsk;
1270
1271         /*
1272          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1273          * count here, tcp_create_openreq_child now does this for us, see the
1274          * comment in that function for the gory details. -acme
1275          */
1276
1277         newsk->sk_gso_type = SKB_GSO_TCPV6;
1278         ip6_dst_store(newsk, dst, NULL, NULL);
1279         inet6_sk_rx_dst_set(newsk, skb);
1280
1281         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1282
1283         newtp = tcp_sk(newsk);
1284         newinet = inet_sk(newsk);
1285         newnp = tcp_inet6_sk(newsk);
1286
1287         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1288
1289         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1290         newnp->saddr = ireq->ir_v6_loc_addr;
1291         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1292         newsk->sk_bound_dev_if = ireq->ir_iif;
1293
1294         /* Now IPv6 options...
1295
1296            First: no IPv4 options.
1297          */
1298         newinet->inet_opt = NULL;
1299         newnp->ipv6_mc_list = NULL;
1300         newnp->ipv6_ac_list = NULL;
1301         newnp->ipv6_fl_list = NULL;
1302
1303         /* Clone RX bits */
1304         newnp->rxopt.all = np->rxopt.all;
1305
1306         newnp->pktoptions = NULL;
1307         newnp->opt        = NULL;
1308         newnp->mcast_oif  = tcp_v6_iif(skb);
1309         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1310         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1311         if (np->repflow)
1312                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1313
1314         /* Set ToS of the new socket based upon the value of incoming SYN.
1315          * ECT bits are set later in tcp_init_transfer().
1316          */
1317         if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1318                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1319
1320         /* Clone native IPv6 options from listening socket (if any)
1321
1322            Yes, keeping reference count would be much more clever,
1323            but we make one more one thing there: reattach optmem
1324            to newsk.
1325          */
1326         opt = ireq->ipv6_opt;
1327         if (!opt)
1328                 opt = rcu_dereference(np->opt);
1329         if (opt) {
1330                 opt = ipv6_dup_options(newsk, opt);
1331                 RCU_INIT_POINTER(newnp->opt, opt);
1332         }
1333         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1334         if (opt)
1335                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1336                                                     opt->opt_flen;
1337
1338         tcp_ca_openreq_child(newsk, dst);
1339
1340         tcp_sync_mss(newsk, dst_mtu(dst));
1341         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1342
1343         tcp_initialize_rcv_mss(newsk);
1344
1345         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1346         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1347
1348 #ifdef CONFIG_TCP_MD5SIG
1349         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1350
1351         /* Copy over the MD5 key from the original socket */
1352         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1353         if (key) {
1354                 /* We're using one, so create a matching key
1355                  * on the newsk structure. If we fail to get
1356                  * memory, then we end up not copying the key
1357                  * across. Shucks.
1358                  */
1359                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1360                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1361                                sk_gfp_mask(sk, GFP_ATOMIC));
1362         }
1363 #endif
1364
1365         if (__inet_inherit_port(sk, newsk) < 0) {
1366                 inet_csk_prepare_forced_close(newsk);
1367                 tcp_done(newsk);
1368                 goto out;
1369         }
1370         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1371                                        &found_dup_sk);
1372         if (*own_req) {
1373                 tcp_move_syn(newtp, req);
1374
1375                 /* Clone pktoptions received with SYN, if we own the req */
1376                 if (ireq->pktopts) {
1377                         newnp->pktoptions = skb_clone(ireq->pktopts,
1378                                                       sk_gfp_mask(sk, GFP_ATOMIC));
1379                         consume_skb(ireq->pktopts);
1380                         ireq->pktopts = NULL;
1381                         if (newnp->pktoptions) {
1382                                 tcp_v6_restore_cb(newnp->pktoptions);
1383                                 skb_set_owner_r(newnp->pktoptions, newsk);
1384                         }
1385                 }
1386         } else {
1387                 if (!req_unhash && found_dup_sk) {
1388                         /* This code path should only be executed in the
1389                          * syncookie case only
1390                          */
1391                         bh_unlock_sock(newsk);
1392                         sock_put(newsk);
1393                         newsk = NULL;
1394                 }
1395         }
1396
1397         return newsk;
1398
1399 out_overflow:
1400         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1401 out_nonewsk:
1402         dst_release(dst);
1403 out:
1404         tcp_listendrop(sk);
1405         return NULL;
1406 }
1407
1408 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1409                                                            u32));
1410 /* The socket must have it's spinlock held when we get
1411  * here, unless it is a TCP_LISTEN socket.
1412  *
1413  * We have a potential double-lock case here, so even when
1414  * doing backlog processing we use the BH locking scheme.
1415  * This is because we cannot sleep with the original spinlock
1416  * held.
1417  */
1418 INDIRECT_CALLABLE_SCOPE
1419 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1420 {
1421         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1422         struct sk_buff *opt_skb = NULL;
1423         enum skb_drop_reason reason;
1424         struct tcp_sock *tp;
1425
1426         /* Imagine: socket is IPv6. IPv4 packet arrives,
1427            goes to IPv4 receive handler and backlogged.
1428            From backlog it always goes here. Kerboom...
1429            Fortunately, tcp_rcv_established and rcv_established
1430            handle them correctly, but it is not case with
1431            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1432          */
1433
1434         if (skb->protocol == htons(ETH_P_IP))
1435                 return tcp_v4_do_rcv(sk, skb);
1436
1437         /*
1438          *      socket locking is here for SMP purposes as backlog rcv
1439          *      is currently called with bh processing disabled.
1440          */
1441
1442         /* Do Stevens' IPV6_PKTOPTIONS.
1443
1444            Yes, guys, it is the only place in our code, where we
1445            may make it not affecting IPv4.
1446            The rest of code is protocol independent,
1447            and I do not like idea to uglify IPv4.
1448
1449            Actually, all the idea behind IPV6_PKTOPTIONS
1450            looks not very well thought. For now we latch
1451            options, received in the last packet, enqueued
1452            by tcp. Feel free to propose better solution.
1453                                                --ANK (980728)
1454          */
1455         if (np->rxopt.all)
1456                 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1457
1458         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1459         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1460                 struct dst_entry *dst;
1461
1462                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1463                                                 lockdep_sock_is_held(sk));
1464
1465                 sock_rps_save_rxhash(sk, skb);
1466                 sk_mark_napi_id(sk, skb);
1467                 if (dst) {
1468                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1469                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1470                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1471                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1472                                 dst_release(dst);
1473                         }
1474                 }
1475
1476                 tcp_rcv_established(sk, skb);
1477                 if (opt_skb)
1478                         goto ipv6_pktoptions;
1479                 return 0;
1480         }
1481
1482         if (tcp_checksum_complete(skb))
1483                 goto csum_err;
1484
1485         if (sk->sk_state == TCP_LISTEN) {
1486                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1487
1488                 if (!nsk)
1489                         goto discard;
1490
1491                 if (nsk != sk) {
1492                         if (tcp_child_process(sk, nsk, skb))
1493                                 goto reset;
1494                         if (opt_skb)
1495                                 __kfree_skb(opt_skb);
1496                         return 0;
1497                 }
1498         } else
1499                 sock_rps_save_rxhash(sk, skb);
1500
1501         if (tcp_rcv_state_process(sk, skb))
1502                 goto reset;
1503         if (opt_skb)
1504                 goto ipv6_pktoptions;
1505         return 0;
1506
1507 reset:
1508         tcp_v6_send_reset(sk, skb);
1509 discard:
1510         if (opt_skb)
1511                 __kfree_skb(opt_skb);
1512         kfree_skb_reason(skb, reason);
1513         return 0;
1514 csum_err:
1515         reason = SKB_DROP_REASON_TCP_CSUM;
1516         trace_tcp_bad_csum(skb);
1517         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1518         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1519         goto discard;
1520
1521
1522 ipv6_pktoptions:
1523         /* Do you ask, what is it?
1524
1525            1. skb was enqueued by tcp.
1526            2. skb is added to tail of read queue, rather than out of order.
1527            3. socket is not in passive state.
1528            4. Finally, it really contains options, which user wants to receive.
1529          */
1530         tp = tcp_sk(sk);
1531         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1532             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1533                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1534                         np->mcast_oif = tcp_v6_iif(opt_skb);
1535                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1536                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1537                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1538                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1539                 if (np->repflow)
1540                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1541                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1542                         skb_set_owner_r(opt_skb, sk);
1543                         tcp_v6_restore_cb(opt_skb);
1544                         opt_skb = xchg(&np->pktoptions, opt_skb);
1545                 } else {
1546                         __kfree_skb(opt_skb);
1547                         opt_skb = xchg(&np->pktoptions, NULL);
1548                 }
1549         }
1550
1551         consume_skb(opt_skb);
1552         return 0;
1553 }
1554
1555 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1556                            const struct tcphdr *th)
1557 {
1558         /* This is tricky: we move IP6CB at its correct location into
1559          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1560          * _decode_session6() uses IP6CB().
1561          * barrier() makes sure compiler won't play aliasing games.
1562          */
1563         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1564                 sizeof(struct inet6_skb_parm));
1565         barrier();
1566
1567         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1568         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1569                                     skb->len - th->doff*4);
1570         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1571         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1572         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1573         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1574         TCP_SKB_CB(skb)->sacked = 0;
1575         TCP_SKB_CB(skb)->has_rxtstamp =
1576                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1577 }
1578
1579 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1580 {
1581         enum skb_drop_reason drop_reason;
1582         int sdif = inet6_sdif(skb);
1583         int dif = inet6_iif(skb);
1584         const struct tcphdr *th;
1585         const struct ipv6hdr *hdr;
1586         bool refcounted;
1587         struct sock *sk;
1588         int ret;
1589         struct net *net = dev_net(skb->dev);
1590
1591         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1592         if (skb->pkt_type != PACKET_HOST)
1593                 goto discard_it;
1594
1595         /*
1596          *      Count it even if it's bad.
1597          */
1598         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1599
1600         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1601                 goto discard_it;
1602
1603         th = (const struct tcphdr *)skb->data;
1604
1605         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1606                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1607                 goto bad_packet;
1608         }
1609         if (!pskb_may_pull(skb, th->doff*4))
1610                 goto discard_it;
1611
1612         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1613                 goto csum_error;
1614
1615         th = (const struct tcphdr *)skb->data;
1616         hdr = ipv6_hdr(skb);
1617
1618 lookup:
1619         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1620                                 th->source, th->dest, inet6_iif(skb), sdif,
1621                                 &refcounted);
1622         if (!sk)
1623                 goto no_tcp_socket;
1624
1625 process:
1626         if (sk->sk_state == TCP_TIME_WAIT)
1627                 goto do_time_wait;
1628
1629         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1630                 struct request_sock *req = inet_reqsk(sk);
1631                 bool req_stolen = false;
1632                 struct sock *nsk;
1633
1634                 sk = req->rsk_listener;
1635                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1636                                                    &hdr->saddr, &hdr->daddr,
1637                                                    AF_INET6, dif, sdif);
1638                 if (drop_reason) {
1639                         sk_drops_add(sk, skb);
1640                         reqsk_put(req);
1641                         goto discard_it;
1642                 }
1643                 if (tcp_checksum_complete(skb)) {
1644                         reqsk_put(req);
1645                         goto csum_error;
1646                 }
1647                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1648                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1649                         if (!nsk) {
1650                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1651                                 goto lookup;
1652                         }
1653                         sk = nsk;
1654                         /* reuseport_migrate_sock() has already held one sk_refcnt
1655                          * before returning.
1656                          */
1657                 } else {
1658                         sock_hold(sk);
1659                 }
1660                 refcounted = true;
1661                 nsk = NULL;
1662                 if (!tcp_filter(sk, skb)) {
1663                         th = (const struct tcphdr *)skb->data;
1664                         hdr = ipv6_hdr(skb);
1665                         tcp_v6_fill_cb(skb, hdr, th);
1666                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1667                 } else {
1668                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1669                 }
1670                 if (!nsk) {
1671                         reqsk_put(req);
1672                         if (req_stolen) {
1673                                 /* Another cpu got exclusive access to req
1674                                  * and created a full blown socket.
1675                                  * Try to feed this packet to this socket
1676                                  * instead of discarding it.
1677                                  */
1678                                 tcp_v6_restore_cb(skb);
1679                                 sock_put(sk);
1680                                 goto lookup;
1681                         }
1682                         goto discard_and_relse;
1683                 }
1684                 if (nsk == sk) {
1685                         reqsk_put(req);
1686                         tcp_v6_restore_cb(skb);
1687                 } else if (tcp_child_process(sk, nsk, skb)) {
1688                         tcp_v6_send_reset(nsk, skb);
1689                         goto discard_and_relse;
1690                 } else {
1691                         sock_put(sk);
1692                         return 0;
1693                 }
1694         }
1695
1696         if (static_branch_unlikely(&ip6_min_hopcount)) {
1697                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1698                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1699                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1700                         goto discard_and_relse;
1701                 }
1702         }
1703
1704         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1705                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1706                 goto discard_and_relse;
1707         }
1708
1709         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1710                                            AF_INET6, dif, sdif);
1711         if (drop_reason)
1712                 goto discard_and_relse;
1713
1714         if (tcp_filter(sk, skb)) {
1715                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1716                 goto discard_and_relse;
1717         }
1718         th = (const struct tcphdr *)skb->data;
1719         hdr = ipv6_hdr(skb);
1720         tcp_v6_fill_cb(skb, hdr, th);
1721
1722         skb->dev = NULL;
1723
1724         if (sk->sk_state == TCP_LISTEN) {
1725                 ret = tcp_v6_do_rcv(sk, skb);
1726                 goto put_and_return;
1727         }
1728
1729         sk_incoming_cpu_update(sk);
1730
1731         bh_lock_sock_nested(sk);
1732         tcp_segs_in(tcp_sk(sk), skb);
1733         ret = 0;
1734         if (!sock_owned_by_user(sk)) {
1735                 ret = tcp_v6_do_rcv(sk, skb);
1736         } else {
1737                 if (tcp_add_backlog(sk, skb, &drop_reason))
1738                         goto discard_and_relse;
1739         }
1740         bh_unlock_sock(sk);
1741 put_and_return:
1742         if (refcounted)
1743                 sock_put(sk);
1744         return ret ? -1 : 0;
1745
1746 no_tcp_socket:
1747         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1748         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1749                 goto discard_it;
1750
1751         tcp_v6_fill_cb(skb, hdr, th);
1752
1753         if (tcp_checksum_complete(skb)) {
1754 csum_error:
1755                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1756                 trace_tcp_bad_csum(skb);
1757                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1758 bad_packet:
1759                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1760         } else {
1761                 tcp_v6_send_reset(NULL, skb);
1762         }
1763
1764 discard_it:
1765         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1766         kfree_skb_reason(skb, drop_reason);
1767         return 0;
1768
1769 discard_and_relse:
1770         sk_drops_add(sk, skb);
1771         if (refcounted)
1772                 sock_put(sk);
1773         goto discard_it;
1774
1775 do_time_wait:
1776         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1777                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1778                 inet_twsk_put(inet_twsk(sk));
1779                 goto discard_it;
1780         }
1781
1782         tcp_v6_fill_cb(skb, hdr, th);
1783
1784         if (tcp_checksum_complete(skb)) {
1785                 inet_twsk_put(inet_twsk(sk));
1786                 goto csum_error;
1787         }
1788
1789         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790         case TCP_TW_SYN:
1791         {
1792                 struct sock *sk2;
1793
1794                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795                                             skb, __tcp_hdrlen(th),
1796                                             &ipv6_hdr(skb)->saddr, th->source,
1797                                             &ipv6_hdr(skb)->daddr,
1798                                             ntohs(th->dest),
1799                                             tcp_v6_iif_l3_slave(skb),
1800                                             sdif);
1801                 if (sk2) {
1802                         struct inet_timewait_sock *tw = inet_twsk(sk);
1803                         inet_twsk_deschedule_put(tw);
1804                         sk = sk2;
1805                         tcp_v6_restore_cb(skb);
1806                         refcounted = false;
1807                         goto process;
1808                 }
1809         }
1810                 /* to ACK */
1811                 fallthrough;
1812         case TCP_TW_ACK:
1813                 tcp_v6_timewait_ack(sk, skb);
1814                 break;
1815         case TCP_TW_RST:
1816                 tcp_v6_send_reset(sk, skb);
1817                 inet_twsk_deschedule_put(inet_twsk(sk));
1818                 goto discard_it;
1819         case TCP_TW_SUCCESS:
1820                 ;
1821         }
1822         goto discard_it;
1823 }
1824
1825 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1826 {
1827         const struct ipv6hdr *hdr;
1828         const struct tcphdr *th;
1829         struct sock *sk;
1830
1831         if (skb->pkt_type != PACKET_HOST)
1832                 return;
1833
1834         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835                 return;
1836
1837         hdr = ipv6_hdr(skb);
1838         th = tcp_hdr(skb);
1839
1840         if (th->doff < sizeof(struct tcphdr) / 4)
1841                 return;
1842
1843         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845                                         &hdr->saddr, th->source,
1846                                         &hdr->daddr, ntohs(th->dest),
1847                                         inet6_iif(skb), inet6_sdif(skb));
1848         if (sk) {
1849                 skb->sk = sk;
1850                 skb->destructor = sock_edemux;
1851                 if (sk_fullsock(sk)) {
1852                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853
1854                         if (dst)
1855                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1856                         if (dst &&
1857                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1858                                 skb_dst_set_noref(skb, dst);
1859                 }
1860         }
1861 }
1862
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1865         .twsk_unique    = tcp_twsk_unique,
1866         .twsk_destructor = tcp_twsk_destructor,
1867 };
1868
1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870 {
1871         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1872 }
1873
1874 const struct inet_connection_sock_af_ops ipv6_specific = {
1875         .queue_xmit        = inet6_csk_xmit,
1876         .send_check        = tcp_v6_send_check,
1877         .rebuild_header    = inet6_sk_rebuild_header,
1878         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1879         .conn_request      = tcp_v6_conn_request,
1880         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1881         .net_header_len    = sizeof(struct ipv6hdr),
1882         .net_frag_header_len = sizeof(struct frag_hdr),
1883         .setsockopt        = ipv6_setsockopt,
1884         .getsockopt        = ipv6_getsockopt,
1885         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1886         .sockaddr_len      = sizeof(struct sockaddr_in6),
1887         .mtu_reduced       = tcp_v6_mtu_reduced,
1888 };
1889
1890 #ifdef CONFIG_TCP_MD5SIG
1891 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1892         .md5_lookup     =       tcp_v6_md5_lookup,
1893         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1894         .md5_parse      =       tcp_v6_parse_md5_keys,
1895 };
1896 #endif
1897
1898 /*
1899  *      TCP over IPv4 via INET6 API
1900  */
1901 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1902         .queue_xmit        = ip_queue_xmit,
1903         .send_check        = tcp_v4_send_check,
1904         .rebuild_header    = inet_sk_rebuild_header,
1905         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1906         .conn_request      = tcp_v6_conn_request,
1907         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1908         .net_header_len    = sizeof(struct iphdr),
1909         .setsockopt        = ipv6_setsockopt,
1910         .getsockopt        = ipv6_getsockopt,
1911         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1912         .sockaddr_len      = sizeof(struct sockaddr_in6),
1913         .mtu_reduced       = tcp_v4_mtu_reduced,
1914 };
1915
1916 #ifdef CONFIG_TCP_MD5SIG
1917 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1918         .md5_lookup     =       tcp_v4_md5_lookup,
1919         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1920         .md5_parse      =       tcp_v6_parse_md5_keys,
1921 };
1922 #endif
1923
1924 /* NOTE: A lot of things set to zero explicitly by call to
1925  *       sk_alloc() so need not be done here.
1926  */
1927 static int tcp_v6_init_sock(struct sock *sk)
1928 {
1929         struct inet_connection_sock *icsk = inet_csk(sk);
1930
1931         tcp_init_sock(sk);
1932
1933         icsk->icsk_af_ops = &ipv6_specific;
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1937 #endif
1938
1939         return 0;
1940 }
1941
1942 static void tcp_v6_destroy_sock(struct sock *sk)
1943 {
1944         tcp_v4_destroy_sock(sk);
1945         inet6_destroy_sock(sk);
1946 }
1947
1948 #ifdef CONFIG_PROC_FS
1949 /* Proc filesystem TCPv6 sock list dumping. */
1950 static void get_openreq6(struct seq_file *seq,
1951                          const struct request_sock *req, int i)
1952 {
1953         long ttd = req->rsk_timer.expires - jiffies;
1954         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1955         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1956
1957         if (ttd < 0)
1958                 ttd = 0;
1959
1960         seq_printf(seq,
1961                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1962                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1963                    i,
1964                    src->s6_addr32[0], src->s6_addr32[1],
1965                    src->s6_addr32[2], src->s6_addr32[3],
1966                    inet_rsk(req)->ir_num,
1967                    dest->s6_addr32[0], dest->s6_addr32[1],
1968                    dest->s6_addr32[2], dest->s6_addr32[3],
1969                    ntohs(inet_rsk(req)->ir_rmt_port),
1970                    TCP_SYN_RECV,
1971                    0, 0, /* could print option size, but that is af dependent. */
1972                    1,   /* timers active (only the expire timer) */
1973                    jiffies_to_clock_t(ttd),
1974                    req->num_timeout,
1975                    from_kuid_munged(seq_user_ns(seq),
1976                                     sock_i_uid(req->rsk_listener)),
1977                    0,  /* non standard timer */
1978                    0, /* open_requests have no inode */
1979                    0, req);
1980 }
1981
1982 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1983 {
1984         const struct in6_addr *dest, *src;
1985         __u16 destp, srcp;
1986         int timer_active;
1987         unsigned long timer_expires;
1988         const struct inet_sock *inet = inet_sk(sp);
1989         const struct tcp_sock *tp = tcp_sk(sp);
1990         const struct inet_connection_sock *icsk = inet_csk(sp);
1991         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1992         int rx_queue;
1993         int state;
1994
1995         dest  = &sp->sk_v6_daddr;
1996         src   = &sp->sk_v6_rcv_saddr;
1997         destp = ntohs(inet->inet_dport);
1998         srcp  = ntohs(inet->inet_sport);
1999
2000         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2001             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2002             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2003                 timer_active    = 1;
2004                 timer_expires   = icsk->icsk_timeout;
2005         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2006                 timer_active    = 4;
2007                 timer_expires   = icsk->icsk_timeout;
2008         } else if (timer_pending(&sp->sk_timer)) {
2009                 timer_active    = 2;
2010                 timer_expires   = sp->sk_timer.expires;
2011         } else {
2012                 timer_active    = 0;
2013                 timer_expires = jiffies;
2014         }
2015
2016         state = inet_sk_state_load(sp);
2017         if (state == TCP_LISTEN)
2018                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2019         else
2020                 /* Because we don't lock the socket,
2021                  * we might find a transient negative value.
2022                  */
2023                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2024                                       READ_ONCE(tp->copied_seq), 0);
2025
2026         seq_printf(seq,
2027                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2028                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2029                    i,
2030                    src->s6_addr32[0], src->s6_addr32[1],
2031                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2032                    dest->s6_addr32[0], dest->s6_addr32[1],
2033                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2034                    state,
2035                    READ_ONCE(tp->write_seq) - tp->snd_una,
2036                    rx_queue,
2037                    timer_active,
2038                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2039                    icsk->icsk_retransmits,
2040                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2041                    icsk->icsk_probes_out,
2042                    sock_i_ino(sp),
2043                    refcount_read(&sp->sk_refcnt), sp,
2044                    jiffies_to_clock_t(icsk->icsk_rto),
2045                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2046                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2047                    tcp_snd_cwnd(tp),
2048                    state == TCP_LISTEN ?
2049                         fastopenq->max_qlen :
2050                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2051                    );
2052 }
2053
2054 static void get_timewait6_sock(struct seq_file *seq,
2055                                struct inet_timewait_sock *tw, int i)
2056 {
2057         long delta = tw->tw_timer.expires - jiffies;
2058         const struct in6_addr *dest, *src;
2059         __u16 destp, srcp;
2060
2061         dest = &tw->tw_v6_daddr;
2062         src  = &tw->tw_v6_rcv_saddr;
2063         destp = ntohs(tw->tw_dport);
2064         srcp  = ntohs(tw->tw_sport);
2065
2066         seq_printf(seq,
2067                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2068                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2069                    i,
2070                    src->s6_addr32[0], src->s6_addr32[1],
2071                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2072                    dest->s6_addr32[0], dest->s6_addr32[1],
2073                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2074                    tw->tw_substate, 0, 0,
2075                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2076                    refcount_read(&tw->tw_refcnt), tw);
2077 }
2078
2079 static int tcp6_seq_show(struct seq_file *seq, void *v)
2080 {
2081         struct tcp_iter_state *st;
2082         struct sock *sk = v;
2083
2084         if (v == SEQ_START_TOKEN) {
2085                 seq_puts(seq,
2086                          "  sl  "
2087                          "local_address                         "
2088                          "remote_address                        "
2089                          "st tx_queue rx_queue tr tm->when retrnsmt"
2090                          "   uid  timeout inode\n");
2091                 goto out;
2092         }
2093         st = seq->private;
2094
2095         if (sk->sk_state == TCP_TIME_WAIT)
2096                 get_timewait6_sock(seq, v, st->num);
2097         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2098                 get_openreq6(seq, v, st->num);
2099         else
2100                 get_tcp6_sock(seq, v, st->num);
2101 out:
2102         return 0;
2103 }
2104
2105 static const struct seq_operations tcp6_seq_ops = {
2106         .show           = tcp6_seq_show,
2107         .start          = tcp_seq_start,
2108         .next           = tcp_seq_next,
2109         .stop           = tcp_seq_stop,
2110 };
2111
2112 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2113         .family         = AF_INET6,
2114 };
2115
2116 int __net_init tcp6_proc_init(struct net *net)
2117 {
2118         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2119                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2120                 return -ENOMEM;
2121         return 0;
2122 }
2123
2124 void tcp6_proc_exit(struct net *net)
2125 {
2126         remove_proc_entry("tcp6", net->proc_net);
2127 }
2128 #endif
2129
2130 struct proto tcpv6_prot = {
2131         .name                   = "TCPv6",
2132         .owner                  = THIS_MODULE,
2133         .close                  = tcp_close,
2134         .pre_connect            = tcp_v6_pre_connect,
2135         .connect                = tcp_v6_connect,
2136         .disconnect             = tcp_disconnect,
2137         .accept                 = inet_csk_accept,
2138         .ioctl                  = tcp_ioctl,
2139         .init                   = tcp_v6_init_sock,
2140         .destroy                = tcp_v6_destroy_sock,
2141         .shutdown               = tcp_shutdown,
2142         .setsockopt             = tcp_setsockopt,
2143         .getsockopt             = tcp_getsockopt,
2144         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2145         .keepalive              = tcp_set_keepalive,
2146         .recvmsg                = tcp_recvmsg,
2147         .sendmsg                = tcp_sendmsg,
2148         .sendpage               = tcp_sendpage,
2149         .backlog_rcv            = tcp_v6_do_rcv,
2150         .release_cb             = tcp_release_cb,
2151         .hash                   = inet6_hash,
2152         .unhash                 = inet_unhash,
2153         .get_port               = inet_csk_get_port,
2154         .put_port               = inet_put_port,
2155 #ifdef CONFIG_BPF_SYSCALL
2156         .psock_update_sk_prot   = tcp_bpf_update_proto,
2157 #endif
2158         .enter_memory_pressure  = tcp_enter_memory_pressure,
2159         .leave_memory_pressure  = tcp_leave_memory_pressure,
2160         .stream_memory_free     = tcp_stream_memory_free,
2161         .sockets_allocated      = &tcp_sockets_allocated,
2162         .memory_allocated       = &tcp_memory_allocated,
2163         .memory_pressure        = &tcp_memory_pressure,
2164         .orphan_count           = &tcp_orphan_count,
2165         .sysctl_mem             = sysctl_tcp_mem,
2166         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2167         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2168         .max_header             = MAX_TCP_HEADER,
2169         .obj_size               = sizeof(struct tcp6_sock),
2170         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2171         .twsk_prot              = &tcp6_timewait_sock_ops,
2172         .rsk_prot               = &tcp6_request_sock_ops,
2173         .h.hashinfo             = &tcp_hashinfo,
2174         .no_autobind            = true,
2175         .diag_destroy           = tcp_abort,
2176 };
2177 EXPORT_SYMBOL_GPL(tcpv6_prot);
2178
2179 /* thinking of making this const? Don't.
2180  * early_demux can change based on sysctl.
2181  */
2182 static struct inet6_protocol tcpv6_protocol = {
2183         .early_demux    =       tcp_v6_early_demux,
2184         .early_demux_handler =  tcp_v6_early_demux,
2185         .handler        =       tcp_v6_rcv,
2186         .err_handler    =       tcp_v6_err,
2187         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2188 };
2189
2190 static struct inet_protosw tcpv6_protosw = {
2191         .type           =       SOCK_STREAM,
2192         .protocol       =       IPPROTO_TCP,
2193         .prot           =       &tcpv6_prot,
2194         .ops            =       &inet6_stream_ops,
2195         .flags          =       INET_PROTOSW_PERMANENT |
2196                                 INET_PROTOSW_ICSK,
2197 };
2198
2199 static int __net_init tcpv6_net_init(struct net *net)
2200 {
2201         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2202                                     SOCK_RAW, IPPROTO_TCP, net);
2203 }
2204
2205 static void __net_exit tcpv6_net_exit(struct net *net)
2206 {
2207         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2208 }
2209
2210 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2211 {
2212         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2213 }
2214
2215 static struct pernet_operations tcpv6_net_ops = {
2216         .init       = tcpv6_net_init,
2217         .exit       = tcpv6_net_exit,
2218         .exit_batch = tcpv6_net_exit_batch,
2219 };
2220
2221 int __init tcpv6_init(void)
2222 {
2223         int ret;
2224
2225         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2226         if (ret)
2227                 goto out;
2228
2229         /* register inet6 protocol */
2230         ret = inet6_register_protosw(&tcpv6_protosw);
2231         if (ret)
2232                 goto out_tcpv6_protocol;
2233
2234         ret = register_pernet_subsys(&tcpv6_net_ops);
2235         if (ret)
2236                 goto out_tcpv6_protosw;
2237
2238         ret = mptcpv6_init();
2239         if (ret)
2240                 goto out_tcpv6_pernet_subsys;
2241
2242 out:
2243         return ret;
2244
2245 out_tcpv6_pernet_subsys:
2246         unregister_pernet_subsys(&tcpv6_net_ops);
2247 out_tcpv6_protosw:
2248         inet6_unregister_protosw(&tcpv6_protosw);
2249 out_tcpv6_protocol:
2250         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2251         goto out;
2252 }
2253
2254 void tcpv6_exit(void)
2255 {
2256         unregister_pernet_subsys(&tcpv6_net_ops);
2257         inet6_unregister_protosw(&tcpv6_protosw);
2258         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2259 }