GNU Linux-libre 5.15.137-gnu
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_sock *inet = inet_sk(sk);
150         struct inet_connection_sock *icsk = inet_csk(sk);
151         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152         struct tcp_sock *tp = tcp_sk(sk);
153         struct in6_addr *saddr = NULL, *final_p, final;
154         struct ipv6_txoptions *opt;
155         struct flowi6 fl6;
156         struct dst_entry *dst;
157         int addr_type;
158         int err;
159         struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161         if (addr_len < SIN6_LEN_RFC2133)
162                 return -EINVAL;
163
164         if (usin->sin6_family != AF_INET6)
165                 return -EAFNOSUPPORT;
166
167         memset(&fl6, 0, sizeof(fl6));
168
169         if (np->sndflow) {
170                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171                 IP6_ECN_flow_init(fl6.flowlabel);
172                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173                         struct ip6_flowlabel *flowlabel;
174                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175                         if (IS_ERR(flowlabel))
176                                 return -EINVAL;
177                         fl6_sock_release(flowlabel);
178                 }
179         }
180
181         /*
182          *      connect() to INADDR_ANY means loopback (BSD'ism).
183          */
184
185         if (ipv6_addr_any(&usin->sin6_addr)) {
186                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188                                                &usin->sin6_addr);
189                 else
190                         usin->sin6_addr = in6addr_loopback;
191         }
192
193         addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195         if (addr_type & IPV6_ADDR_MULTICAST)
196                 return -ENETUNREACH;
197
198         if (addr_type&IPV6_ADDR_LINKLOCAL) {
199                 if (addr_len >= sizeof(struct sockaddr_in6) &&
200                     usin->sin6_scope_id) {
201                         /* If interface is set while binding, indices
202                          * must coincide.
203                          */
204                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205                                 return -EINVAL;
206
207                         sk->sk_bound_dev_if = usin->sin6_scope_id;
208                 }
209
210                 /* Connect to link-local address requires an interface */
211                 if (!sk->sk_bound_dev_if)
212                         return -EINVAL;
213         }
214
215         if (tp->rx_opt.ts_recent_stamp &&
216             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217                 tp->rx_opt.ts_recent = 0;
218                 tp->rx_opt.ts_recent_stamp = 0;
219                 WRITE_ONCE(tp->write_seq, 0);
220         }
221
222         sk->sk_v6_daddr = usin->sin6_addr;
223         np->flow_label = fl6.flowlabel;
224
225         /*
226          *      TCP over IPv4
227          */
228
229         if (addr_type & IPV6_ADDR_MAPPED) {
230                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231                 struct sockaddr_in sin;
232
233                 if (__ipv6_only_sock(sk))
234                         return -ENETUNREACH;
235
236                 sin.sin_family = AF_INET;
237                 sin.sin_port = usin->sin6_port;
238                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240                 icsk->icsk_af_ops = &ipv6_mapped;
241                 if (sk_is_mptcp(sk))
242                         mptcpv6_handle_mapped(sk, true);
243                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250                 if (err) {
251                         icsk->icsk_ext_hdr_len = exthdrlen;
252                         icsk->icsk_af_ops = &ipv6_specific;
253                         if (sk_is_mptcp(sk))
254                                 mptcpv6_handle_mapped(sk, false);
255                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257                         tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259                         goto failure;
260                 }
261                 np->saddr = sk->sk_v6_rcv_saddr;
262
263                 return err;
264         }
265
266         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267                 saddr = &sk->sk_v6_rcv_saddr;
268
269         fl6.flowi6_proto = IPPROTO_TCP;
270         fl6.daddr = sk->sk_v6_daddr;
271         fl6.saddr = saddr ? *saddr : np->saddr;
272         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
273         fl6.flowi6_oif = sk->sk_bound_dev_if;
274         fl6.flowi6_mark = sk->sk_mark;
275         fl6.fl6_dport = usin->sin6_port;
276         fl6.fl6_sport = inet->inet_sport;
277         fl6.flowi6_uid = sk->sk_uid;
278
279         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
280         final_p = fl6_update_dst(&fl6, opt, &final);
281
282         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283
284         dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
285         if (IS_ERR(dst)) {
286                 err = PTR_ERR(dst);
287                 goto failure;
288         }
289
290         if (!saddr) {
291                 saddr = &fl6.saddr;
292                 sk->sk_v6_rcv_saddr = *saddr;
293         }
294
295         /* set the source address */
296         np->saddr = *saddr;
297         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298
299         sk->sk_gso_type = SKB_GSO_TCPV6;
300         ip6_dst_store(sk, dst, NULL, NULL);
301
302         icsk->icsk_ext_hdr_len = 0;
303         if (opt)
304                 icsk->icsk_ext_hdr_len = opt->opt_flen +
305                                          opt->opt_nflen;
306
307         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308
309         inet->inet_dport = usin->sin6_port;
310
311         tcp_set_state(sk, TCP_SYN_SENT);
312         err = inet6_hash_connect(tcp_death_row, sk);
313         if (err)
314                 goto late_failure;
315
316         sk_set_txhash(sk);
317
318         if (likely(!tp->repair)) {
319                 if (!tp->write_seq)
320                         WRITE_ONCE(tp->write_seq,
321                                    secure_tcpv6_seq(np->saddr.s6_addr32,
322                                                     sk->sk_v6_daddr.s6_addr32,
323                                                     inet->inet_sport,
324                                                     inet->inet_dport));
325                 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326                                                    np->saddr.s6_addr32,
327                                                    sk->sk_v6_daddr.s6_addr32);
328         }
329
330         if (tcp_fastopen_defer_connect(sk, &err))
331                 return err;
332         if (err)
333                 goto late_failure;
334
335         err = tcp_connect(sk);
336         if (err)
337                 goto late_failure;
338
339         return 0;
340
341 late_failure:
342         tcp_set_state(sk, TCP_CLOSE);
343         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
344                 inet_reset_saddr(sk);
345 failure:
346         inet->inet_dport = 0;
347         sk->sk_route_caps = 0;
348         return err;
349 }
350
351 static void tcp_v6_mtu_reduced(struct sock *sk)
352 {
353         struct dst_entry *dst;
354         u32 mtu;
355
356         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
357                 return;
358
359         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
360
361         /* Drop requests trying to increase our current mss.
362          * Check done in __ip6_rt_update_pmtu() is too late.
363          */
364         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
365                 return;
366
367         dst = inet6_csk_update_pmtu(sk, mtu);
368         if (!dst)
369                 return;
370
371         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
372                 tcp_sync_mss(sk, dst_mtu(dst));
373                 tcp_simple_retransmit(sk);
374         }
375 }
376
377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
378                 u8 type, u8 code, int offset, __be32 info)
379 {
380         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
381         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
382         struct net *net = dev_net(skb->dev);
383         struct request_sock *fastopen;
384         struct ipv6_pinfo *np;
385         struct tcp_sock *tp;
386         __u32 seq, snd_una;
387         struct sock *sk;
388         bool fatal;
389         int err;
390
391         sk = __inet6_lookup_established(net, &tcp_hashinfo,
392                                         &hdr->daddr, th->dest,
393                                         &hdr->saddr, ntohs(th->source),
394                                         skb->dev->ifindex, inet6_sdif(skb));
395
396         if (!sk) {
397                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
398                                   ICMP6_MIB_INERRORS);
399                 return -ENOENT;
400         }
401
402         if (sk->sk_state == TCP_TIME_WAIT) {
403                 inet_twsk_put(inet_twsk(sk));
404                 return 0;
405         }
406         seq = ntohl(th->seq);
407         fatal = icmpv6_err_convert(type, code, &err);
408         if (sk->sk_state == TCP_NEW_SYN_RECV) {
409                 tcp_req_err(sk, seq, fatal);
410                 return 0;
411         }
412
413         bh_lock_sock(sk);
414         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
415                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
416
417         if (sk->sk_state == TCP_CLOSE)
418                 goto out;
419
420         if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
421                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422                 goto out;
423         }
424
425         tp = tcp_sk(sk);
426         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
427         fastopen = rcu_dereference(tp->fastopen_rsk);
428         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
429         if (sk->sk_state != TCP_LISTEN &&
430             !between(seq, snd_una, tp->snd_nxt)) {
431                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432                 goto out;
433         }
434
435         np = tcp_inet6_sk(sk);
436
437         if (type == NDISC_REDIRECT) {
438                 if (!sock_owned_by_user(sk)) {
439                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
440
441                         if (dst)
442                                 dst->ops->redirect(dst, sk, skb);
443                 }
444                 goto out;
445         }
446
447         if (type == ICMPV6_PKT_TOOBIG) {
448                 u32 mtu = ntohl(info);
449
450                 /* We are not interested in TCP_LISTEN and open_requests
451                  * (SYN-ACKs send out by Linux are always <576bytes so
452                  * they should go through unfragmented).
453                  */
454                 if (sk->sk_state == TCP_LISTEN)
455                         goto out;
456
457                 if (!ip6_sk_accept_pmtu(sk))
458                         goto out;
459
460                 if (mtu < IPV6_MIN_MTU)
461                         goto out;
462
463                 WRITE_ONCE(tp->mtu_info, mtu);
464
465                 if (!sock_owned_by_user(sk))
466                         tcp_v6_mtu_reduced(sk);
467                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
468                                            &sk->sk_tsq_flags))
469                         sock_hold(sk);
470                 goto out;
471         }
472
473
474         /* Might be for an request_sock */
475         switch (sk->sk_state) {
476         case TCP_SYN_SENT:
477         case TCP_SYN_RECV:
478                 /* Only in fast or simultaneous open. If a fast open socket is
479                  * already accepted it is treated as a connected one below.
480                  */
481                 if (fastopen && !fastopen->sk)
482                         break;
483
484                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
485
486                 if (!sock_owned_by_user(sk)) {
487                         sk->sk_err = err;
488                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
489
490                         tcp_done(sk);
491                 } else
492                         sk->sk_err_soft = err;
493                 goto out;
494         case TCP_LISTEN:
495                 break;
496         default:
497                 /* check if this ICMP message allows revert of backoff.
498                  * (see RFC 6069)
499                  */
500                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
501                     code == ICMPV6_NOROUTE)
502                         tcp_ld_RTO_revert(sk, seq);
503         }
504
505         if (!sock_owned_by_user(sk) && np->recverr) {
506                 sk->sk_err = err;
507                 sk_error_report(sk);
508         } else
509                 sk->sk_err_soft = err;
510
511 out:
512         bh_unlock_sock(sk);
513         sock_put(sk);
514         return 0;
515 }
516
517
518 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
519                               struct flowi *fl,
520                               struct request_sock *req,
521                               struct tcp_fastopen_cookie *foc,
522                               enum tcp_synack_type synack_type,
523                               struct sk_buff *syn_skb)
524 {
525         struct inet_request_sock *ireq = inet_rsk(req);
526         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
527         struct ipv6_txoptions *opt;
528         struct flowi6 *fl6 = &fl->u.ip6;
529         struct sk_buff *skb;
530         int err = -ENOMEM;
531         u8 tclass;
532
533         /* First, grab a route. */
534         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
535                                                IPPROTO_TCP)) == NULL)
536                 goto done;
537
538         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
539
540         if (skb) {
541                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
542                                     &ireq->ir_v6_rmt_addr);
543
544                 fl6->daddr = ireq->ir_v6_rmt_addr;
545                 if (np->repflow && ireq->pktopts)
546                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
547
548                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
549                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
550                                 (np->tclass & INET_ECN_MASK) :
551                                 np->tclass;
552
553                 if (!INET_ECN_is_capable(tclass) &&
554                     tcp_bpf_ca_needs_ecn((struct sock *)req))
555                         tclass |= INET_ECN_ECT_0;
556
557                 rcu_read_lock();
558                 opt = ireq->ipv6_opt;
559                 if (!opt)
560                         opt = rcu_dereference(np->opt);
561                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
562                                tclass, sk->sk_priority);
563                 rcu_read_unlock();
564                 err = net_xmit_eval(err);
565         }
566
567 done:
568         return err;
569 }
570
571
572 static void tcp_v6_reqsk_destructor(struct request_sock *req)
573 {
574         kfree(inet_rsk(req)->ipv6_opt);
575         kfree_skb(inet_rsk(req)->pktopts);
576 }
577
578 #ifdef CONFIG_TCP_MD5SIG
579 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
580                                                    const struct in6_addr *addr,
581                                                    int l3index)
582 {
583         return tcp_md5_do_lookup(sk, l3index,
584                                  (union tcp_md5_addr *)addr, AF_INET6);
585 }
586
587 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
588                                                 const struct sock *addr_sk)
589 {
590         int l3index;
591
592         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
593                                                  addr_sk->sk_bound_dev_if);
594         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595                                     l3index);
596 }
597
598 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
599                                  sockptr_t optval, int optlen)
600 {
601         struct tcp_md5sig cmd;
602         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603         int l3index = 0;
604         u8 prefixlen;
605         u8 flags;
606
607         if (optlen < sizeof(cmd))
608                 return -EINVAL;
609
610         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
611                 return -EFAULT;
612
613         if (sin6->sin6_family != AF_INET6)
614                 return -EINVAL;
615
616         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
617
618         if (optname == TCP_MD5SIG_EXT &&
619             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
620                 prefixlen = cmd.tcpm_prefixlen;
621                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
622                                         prefixlen > 32))
623                         return -EINVAL;
624         } else {
625                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
626         }
627
628         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
629             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
630                 struct net_device *dev;
631
632                 rcu_read_lock();
633                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
634                 if (dev && netif_is_l3_master(dev))
635                         l3index = dev->ifindex;
636                 rcu_read_unlock();
637
638                 /* ok to reference set/not set outside of rcu;
639                  * right now device MUST be an L3 master
640                  */
641                 if (!dev || !l3index)
642                         return -EINVAL;
643         }
644
645         if (!cmd.tcpm_keylen) {
646                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
647                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
648                                               AF_INET, prefixlen,
649                                               l3index, flags);
650                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
651                                       AF_INET6, prefixlen, l3index, flags);
652         }
653
654         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
655                 return -EINVAL;
656
657         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
658                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
659                                       AF_INET, prefixlen, l3index, flags,
660                                       cmd.tcpm_key, cmd.tcpm_keylen,
661                                       GFP_KERNEL);
662
663         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
664                               AF_INET6, prefixlen, l3index, flags,
665                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
666 }
667
668 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
669                                    const struct in6_addr *daddr,
670                                    const struct in6_addr *saddr,
671                                    const struct tcphdr *th, int nbytes)
672 {
673         struct tcp6_pseudohdr *bp;
674         struct scatterlist sg;
675         struct tcphdr *_th;
676
677         bp = hp->scratch;
678         /* 1. TCP pseudo-header (RFC2460) */
679         bp->saddr = *saddr;
680         bp->daddr = *daddr;
681         bp->protocol = cpu_to_be32(IPPROTO_TCP);
682         bp->len = cpu_to_be32(nbytes);
683
684         _th = (struct tcphdr *)(bp + 1);
685         memcpy(_th, th, sizeof(*th));
686         _th->check = 0;
687
688         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
689         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
690                                 sizeof(*bp) + sizeof(*th));
691         return crypto_ahash_update(hp->md5_req);
692 }
693
694 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
695                                const struct in6_addr *daddr, struct in6_addr *saddr,
696                                const struct tcphdr *th)
697 {
698         struct tcp_md5sig_pool *hp;
699         struct ahash_request *req;
700
701         hp = tcp_get_md5sig_pool();
702         if (!hp)
703                 goto clear_hash_noput;
704         req = hp->md5_req;
705
706         if (crypto_ahash_init(req))
707                 goto clear_hash;
708         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
709                 goto clear_hash;
710         if (tcp_md5_hash_key(hp, key))
711                 goto clear_hash;
712         ahash_request_set_crypt(req, NULL, md5_hash, 0);
713         if (crypto_ahash_final(req))
714                 goto clear_hash;
715
716         tcp_put_md5sig_pool();
717         return 0;
718
719 clear_hash:
720         tcp_put_md5sig_pool();
721 clear_hash_noput:
722         memset(md5_hash, 0, 16);
723         return 1;
724 }
725
726 static int tcp_v6_md5_hash_skb(char *md5_hash,
727                                const struct tcp_md5sig_key *key,
728                                const struct sock *sk,
729                                const struct sk_buff *skb)
730 {
731         const struct in6_addr *saddr, *daddr;
732         struct tcp_md5sig_pool *hp;
733         struct ahash_request *req;
734         const struct tcphdr *th = tcp_hdr(skb);
735
736         if (sk) { /* valid for establish/request sockets */
737                 saddr = &sk->sk_v6_rcv_saddr;
738                 daddr = &sk->sk_v6_daddr;
739         } else {
740                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
741                 saddr = &ip6h->saddr;
742                 daddr = &ip6h->daddr;
743         }
744
745         hp = tcp_get_md5sig_pool();
746         if (!hp)
747                 goto clear_hash_noput;
748         req = hp->md5_req;
749
750         if (crypto_ahash_init(req))
751                 goto clear_hash;
752
753         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
754                 goto clear_hash;
755         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
756                 goto clear_hash;
757         if (tcp_md5_hash_key(hp, key))
758                 goto clear_hash;
759         ahash_request_set_crypt(req, NULL, md5_hash, 0);
760         if (crypto_ahash_final(req))
761                 goto clear_hash;
762
763         tcp_put_md5sig_pool();
764         return 0;
765
766 clear_hash:
767         tcp_put_md5sig_pool();
768 clear_hash_noput:
769         memset(md5_hash, 0, 16);
770         return 1;
771 }
772
773 #endif
774
775 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
776                                     const struct sk_buff *skb,
777                                     int dif, int sdif)
778 {
779 #ifdef CONFIG_TCP_MD5SIG
780         const __u8 *hash_location = NULL;
781         struct tcp_md5sig_key *hash_expected;
782         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
783         const struct tcphdr *th = tcp_hdr(skb);
784         int genhash, l3index;
785         u8 newhash[16];
786
787         /* sdif set, means packet ingressed via a device
788          * in an L3 domain and dif is set to the l3mdev
789          */
790         l3index = sdif ? dif : 0;
791
792         hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
793         hash_location = tcp_parse_md5sig_option(th);
794
795         /* We've parsed the options - do we have a hash? */
796         if (!hash_expected && !hash_location)
797                 return false;
798
799         if (hash_expected && !hash_location) {
800                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
801                 return true;
802         }
803
804         if (!hash_expected && hash_location) {
805                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
806                 return true;
807         }
808
809         /* check the signature */
810         genhash = tcp_v6_md5_hash_skb(newhash,
811                                       hash_expected,
812                                       NULL, skb);
813
814         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
815                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
816                 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
817                                      genhash ? "failed" : "mismatch",
818                                      &ip6h->saddr, ntohs(th->source),
819                                      &ip6h->daddr, ntohs(th->dest), l3index);
820                 return true;
821         }
822 #endif
823         return false;
824 }
825
826 static void tcp_v6_init_req(struct request_sock *req,
827                             const struct sock *sk_listener,
828                             struct sk_buff *skb)
829 {
830         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
831         struct inet_request_sock *ireq = inet_rsk(req);
832         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
833
834         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
835         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
836
837         /* So that link locals have meaning */
838         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
839             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
840                 ireq->ir_iif = tcp_v6_iif(skb);
841
842         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
843             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
844              np->rxopt.bits.rxinfo ||
845              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
846              np->rxopt.bits.rxohlim || np->repflow)) {
847                 refcount_inc(&skb->users);
848                 ireq->pktopts = skb;
849         }
850 }
851
852 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
853                                           struct sk_buff *skb,
854                                           struct flowi *fl,
855                                           struct request_sock *req)
856 {
857         tcp_v6_init_req(req, sk, skb);
858
859         if (security_inet_conn_request(sk, skb, req))
860                 return NULL;
861
862         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
863 }
864
865 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
866         .family         =       AF_INET6,
867         .obj_size       =       sizeof(struct tcp6_request_sock),
868         .rtx_syn_ack    =       tcp_rtx_synack,
869         .send_ack       =       tcp_v6_reqsk_send_ack,
870         .destructor     =       tcp_v6_reqsk_destructor,
871         .send_reset     =       tcp_v6_send_reset,
872         .syn_ack_timeout =      tcp_syn_ack_timeout,
873 };
874
875 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
876         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
877                                 sizeof(struct ipv6hdr),
878 #ifdef CONFIG_TCP_MD5SIG
879         .req_md5_lookup =       tcp_v6_md5_lookup,
880         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
881 #endif
882 #ifdef CONFIG_SYN_COOKIES
883         .cookie_init_seq =      cookie_v6_init_sequence,
884 #endif
885         .route_req      =       tcp_v6_route_req,
886         .init_seq       =       tcp_v6_init_seq,
887         .init_ts_off    =       tcp_v6_init_ts_off,
888         .send_synack    =       tcp_v6_send_synack,
889 };
890
891 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
892                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
893                                  int oif, struct tcp_md5sig_key *key, int rst,
894                                  u8 tclass, __be32 label, u32 priority)
895 {
896         const struct tcphdr *th = tcp_hdr(skb);
897         struct tcphdr *t1;
898         struct sk_buff *buff;
899         struct flowi6 fl6;
900         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
901         struct sock *ctl_sk = net->ipv6.tcp_sk;
902         unsigned int tot_len = sizeof(struct tcphdr);
903         __be32 mrst = 0, *topt;
904         struct dst_entry *dst;
905         __u32 mark = 0;
906
907         if (tsecr)
908                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
909 #ifdef CONFIG_TCP_MD5SIG
910         if (key)
911                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
912 #endif
913
914 #ifdef CONFIG_MPTCP
915         if (rst && !key) {
916                 mrst = mptcp_reset_option(skb);
917
918                 if (mrst)
919                         tot_len += sizeof(__be32);
920         }
921 #endif
922
923         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
924                          GFP_ATOMIC);
925         if (!buff)
926                 return;
927
928         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
929
930         t1 = skb_push(buff, tot_len);
931         skb_reset_transport_header(buff);
932
933         /* Swap the send and the receive. */
934         memset(t1, 0, sizeof(*t1));
935         t1->dest = th->source;
936         t1->source = th->dest;
937         t1->doff = tot_len / 4;
938         t1->seq = htonl(seq);
939         t1->ack_seq = htonl(ack);
940         t1->ack = !rst || !th->ack;
941         t1->rst = rst;
942         t1->window = htons(win);
943
944         topt = (__be32 *)(t1 + 1);
945
946         if (tsecr) {
947                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
948                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
949                 *topt++ = htonl(tsval);
950                 *topt++ = htonl(tsecr);
951         }
952
953         if (mrst)
954                 *topt++ = mrst;
955
956 #ifdef CONFIG_TCP_MD5SIG
957         if (key) {
958                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
959                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
960                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
961                                     &ipv6_hdr(skb)->saddr,
962                                     &ipv6_hdr(skb)->daddr, t1);
963         }
964 #endif
965
966         memset(&fl6, 0, sizeof(fl6));
967         fl6.daddr = ipv6_hdr(skb)->saddr;
968         fl6.saddr = ipv6_hdr(skb)->daddr;
969         fl6.flowlabel = label;
970
971         buff->ip_summed = CHECKSUM_PARTIAL;
972         buff->csum = 0;
973
974         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
975
976         fl6.flowi6_proto = IPPROTO_TCP;
977         if (rt6_need_strict(&fl6.daddr) && !oif)
978                 fl6.flowi6_oif = tcp_v6_iif(skb);
979         else {
980                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
981                         oif = skb->skb_iif;
982
983                 fl6.flowi6_oif = oif;
984         }
985
986         if (sk) {
987                 if (sk->sk_state == TCP_TIME_WAIT) {
988                         mark = inet_twsk(sk)->tw_mark;
989                         /* autoflowlabel relies on buff->hash */
990                         skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
991                                      PKT_HASH_TYPE_L4);
992                 } else {
993                         mark = sk->sk_mark;
994                 }
995                 buff->tstamp = tcp_transmit_time(sk);
996         }
997         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
998         fl6.fl6_dport = t1->dest;
999         fl6.fl6_sport = t1->source;
1000         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
1001         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
1002
1003         /* Pass a socket to ip6_dst_lookup either it is for RST
1004          * Underlying function will use this to retrieve the network
1005          * namespace
1006          */
1007         if (sk && sk->sk_state != TCP_TIME_WAIT)
1008                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
1009         else
1010                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
1011         if (!IS_ERR(dst)) {
1012                 skb_dst_set(buff, dst);
1013                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1014                          tclass & ~INET_ECN_MASK, priority);
1015                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1016                 if (rst)
1017                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1018                 return;
1019         }
1020
1021         kfree_skb(buff);
1022 }
1023
1024 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1025 {
1026         const struct tcphdr *th = tcp_hdr(skb);
1027         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1028         u32 seq = 0, ack_seq = 0;
1029         struct tcp_md5sig_key *key = NULL;
1030 #ifdef CONFIG_TCP_MD5SIG
1031         const __u8 *hash_location = NULL;
1032         unsigned char newhash[16];
1033         int genhash;
1034         struct sock *sk1 = NULL;
1035 #endif
1036         __be32 label = 0;
1037         u32 priority = 0;
1038         struct net *net;
1039         int oif = 0;
1040
1041         if (th->rst)
1042                 return;
1043
1044         /* If sk not NULL, it means we did a successful lookup and incoming
1045          * route had to be correct. prequeue might have dropped our dst.
1046          */
1047         if (!sk && !ipv6_unicast_destination(skb))
1048                 return;
1049
1050         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1051 #ifdef CONFIG_TCP_MD5SIG
1052         rcu_read_lock();
1053         hash_location = tcp_parse_md5sig_option(th);
1054         if (sk && sk_fullsock(sk)) {
1055                 int l3index;
1056
1057                 /* sdif set, means packet ingressed via a device
1058                  * in an L3 domain and inet_iif is set to it.
1059                  */
1060                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1061                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1062         } else if (hash_location) {
1063                 int dif = tcp_v6_iif_l3_slave(skb);
1064                 int sdif = tcp_v6_sdif(skb);
1065                 int l3index;
1066
1067                 /*
1068                  * active side is lost. Try to find listening socket through
1069                  * source port, and then find md5 key through listening socket.
1070                  * we are not loose security here:
1071                  * Incoming packet is checked with md5 hash with finding key,
1072                  * no RST generated if md5 hash doesn't match.
1073                  */
1074                 sk1 = inet6_lookup_listener(net,
1075                                            &tcp_hashinfo, NULL, 0,
1076                                            &ipv6h->saddr,
1077                                            th->source, &ipv6h->daddr,
1078                                            ntohs(th->source), dif, sdif);
1079                 if (!sk1)
1080                         goto out;
1081
1082                 /* sdif set, means packet ingressed via a device
1083                  * in an L3 domain and dif is set to it.
1084                  */
1085                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1086
1087                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1088                 if (!key)
1089                         goto out;
1090
1091                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1092                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1093                         goto out;
1094         }
1095 #endif
1096
1097         if (th->ack)
1098                 seq = ntohl(th->ack_seq);
1099         else
1100                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1101                           (th->doff << 2);
1102
1103         if (sk) {
1104                 oif = sk->sk_bound_dev_if;
1105                 if (sk_fullsock(sk)) {
1106                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1107
1108                         trace_tcp_send_reset(sk, skb);
1109                         if (np->repflow)
1110                                 label = ip6_flowlabel(ipv6h);
1111                         priority = sk->sk_priority;
1112                 }
1113                 if (sk->sk_state == TCP_TIME_WAIT) {
1114                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1115                         priority = inet_twsk(sk)->tw_priority;
1116                 }
1117         } else {
1118                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1119                         label = ip6_flowlabel(ipv6h);
1120         }
1121
1122         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1123                              ipv6_get_dsfield(ipv6h), label, priority);
1124
1125 #ifdef CONFIG_TCP_MD5SIG
1126 out:
1127         rcu_read_unlock();
1128 #endif
1129 }
1130
1131 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1132                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1133                             struct tcp_md5sig_key *key, u8 tclass,
1134                             __be32 label, u32 priority)
1135 {
1136         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1137                              tclass, label, priority);
1138 }
1139
1140 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1141 {
1142         struct inet_timewait_sock *tw = inet_twsk(sk);
1143         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1144
1145         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1146                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1147                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1148                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1149                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1150
1151         inet_twsk_put(tw);
1152 }
1153
1154 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1155                                   struct request_sock *req)
1156 {
1157         int l3index;
1158
1159         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1160
1161         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1162          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1163          */
1164         /* RFC 7323 2.3
1165          * The window field (SEG.WND) of every outgoing segment, with the
1166          * exception of <SYN> segments, MUST be right-shifted by
1167          * Rcv.Wind.Shift bits:
1168          */
1169         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1170                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1171                         tcp_rsk(req)->rcv_nxt,
1172                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1173                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1174                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1175                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1176                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1177 }
1178
1179
1180 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1181 {
1182 #ifdef CONFIG_SYN_COOKIES
1183         const struct tcphdr *th = tcp_hdr(skb);
1184
1185         if (!th->syn)
1186                 sk = cookie_v6_check(sk, skb);
1187 #endif
1188         return sk;
1189 }
1190
1191 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1192                          struct tcphdr *th, u32 *cookie)
1193 {
1194         u16 mss = 0;
1195 #ifdef CONFIG_SYN_COOKIES
1196         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1197                                     &tcp_request_sock_ipv6_ops, sk, th);
1198         if (mss) {
1199                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1200                 tcp_synq_overflow(sk);
1201         }
1202 #endif
1203         return mss;
1204 }
1205
1206 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1207 {
1208         if (skb->protocol == htons(ETH_P_IP))
1209                 return tcp_v4_conn_request(sk, skb);
1210
1211         if (!ipv6_unicast_destination(skb))
1212                 goto drop;
1213
1214         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1215                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1216                 return 0;
1217         }
1218
1219         return tcp_conn_request(&tcp6_request_sock_ops,
1220                                 &tcp_request_sock_ipv6_ops, sk, skb);
1221
1222 drop:
1223         tcp_listendrop(sk);
1224         return 0; /* don't send reset */
1225 }
1226
1227 static void tcp_v6_restore_cb(struct sk_buff *skb)
1228 {
1229         /* We need to move header back to the beginning if xfrm6_policy_check()
1230          * and tcp_v6_fill_cb() are going to be called again.
1231          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1232          */
1233         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1234                 sizeof(struct inet6_skb_parm));
1235 }
1236
1237 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1238                                          struct request_sock *req,
1239                                          struct dst_entry *dst,
1240                                          struct request_sock *req_unhash,
1241                                          bool *own_req)
1242 {
1243         struct inet_request_sock *ireq;
1244         struct ipv6_pinfo *newnp;
1245         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1246         struct ipv6_txoptions *opt;
1247         struct inet_sock *newinet;
1248         bool found_dup_sk = false;
1249         struct tcp_sock *newtp;
1250         struct sock *newsk;
1251 #ifdef CONFIG_TCP_MD5SIG
1252         struct tcp_md5sig_key *key;
1253         int l3index;
1254 #endif
1255         struct flowi6 fl6;
1256
1257         if (skb->protocol == htons(ETH_P_IP)) {
1258                 /*
1259                  *      v6 mapped
1260                  */
1261
1262                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1263                                              req_unhash, own_req);
1264
1265                 if (!newsk)
1266                         return NULL;
1267
1268                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1269
1270                 newinet = inet_sk(newsk);
1271                 newnp = tcp_inet6_sk(newsk);
1272                 newtp = tcp_sk(newsk);
1273
1274                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1275
1276                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1277
1278                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1279                 if (sk_is_mptcp(newsk))
1280                         mptcpv6_handle_mapped(newsk, true);
1281                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1282 #ifdef CONFIG_TCP_MD5SIG
1283                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1284 #endif
1285
1286                 newnp->ipv6_mc_list = NULL;
1287                 newnp->ipv6_ac_list = NULL;
1288                 newnp->ipv6_fl_list = NULL;
1289                 newnp->pktoptions  = NULL;
1290                 newnp->opt         = NULL;
1291                 newnp->mcast_oif   = inet_iif(skb);
1292                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1293                 newnp->rcv_flowinfo = 0;
1294                 if (np->repflow)
1295                         newnp->flow_label = 0;
1296
1297                 /*
1298                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1299                  * here, tcp_create_openreq_child now does this for us, see the comment in
1300                  * that function for the gory details. -acme
1301                  */
1302
1303                 /* It is tricky place. Until this moment IPv4 tcp
1304                    worked with IPv6 icsk.icsk_af_ops.
1305                    Sync it now.
1306                  */
1307                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1308
1309                 return newsk;
1310         }
1311
1312         ireq = inet_rsk(req);
1313
1314         if (sk_acceptq_is_full(sk))
1315                 goto out_overflow;
1316
1317         if (!dst) {
1318                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1319                 if (!dst)
1320                         goto out;
1321         }
1322
1323         newsk = tcp_create_openreq_child(sk, req, skb);
1324         if (!newsk)
1325                 goto out_nonewsk;
1326
1327         /*
1328          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1329          * count here, tcp_create_openreq_child now does this for us, see the
1330          * comment in that function for the gory details. -acme
1331          */
1332
1333         newsk->sk_gso_type = SKB_GSO_TCPV6;
1334         ip6_dst_store(newsk, dst, NULL, NULL);
1335         inet6_sk_rx_dst_set(newsk, skb);
1336
1337         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1338
1339         newtp = tcp_sk(newsk);
1340         newinet = inet_sk(newsk);
1341         newnp = tcp_inet6_sk(newsk);
1342
1343         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1344
1345         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1346         newnp->saddr = ireq->ir_v6_loc_addr;
1347         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1348         newsk->sk_bound_dev_if = ireq->ir_iif;
1349
1350         /* Now IPv6 options...
1351
1352            First: no IPv4 options.
1353          */
1354         newinet->inet_opt = NULL;
1355         newnp->ipv6_mc_list = NULL;
1356         newnp->ipv6_ac_list = NULL;
1357         newnp->ipv6_fl_list = NULL;
1358
1359         /* Clone RX bits */
1360         newnp->rxopt.all = np->rxopt.all;
1361
1362         newnp->pktoptions = NULL;
1363         newnp->opt        = NULL;
1364         newnp->mcast_oif  = tcp_v6_iif(skb);
1365         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1366         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1367         if (np->repflow)
1368                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1369
1370         /* Set ToS of the new socket based upon the value of incoming SYN.
1371          * ECT bits are set later in tcp_init_transfer().
1372          */
1373         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1374                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1375
1376         /* Clone native IPv6 options from listening socket (if any)
1377
1378            Yes, keeping reference count would be much more clever,
1379            but we make one more one thing there: reattach optmem
1380            to newsk.
1381          */
1382         opt = ireq->ipv6_opt;
1383         if (!opt)
1384                 opt = rcu_dereference(np->opt);
1385         if (opt) {
1386                 opt = ipv6_dup_options(newsk, opt);
1387                 RCU_INIT_POINTER(newnp->opt, opt);
1388         }
1389         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1390         if (opt)
1391                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1392                                                     opt->opt_flen;
1393
1394         tcp_ca_openreq_child(newsk, dst);
1395
1396         tcp_sync_mss(newsk, dst_mtu(dst));
1397         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1398
1399         tcp_initialize_rcv_mss(newsk);
1400
1401         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1402         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1403
1404 #ifdef CONFIG_TCP_MD5SIG
1405         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1406
1407         /* Copy over the MD5 key from the original socket */
1408         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1409         if (key) {
1410                 /* We're using one, so create a matching key
1411                  * on the newsk structure. If we fail to get
1412                  * memory, then we end up not copying the key
1413                  * across. Shucks.
1414                  */
1415                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1416                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1417                                sk_gfp_mask(sk, GFP_ATOMIC));
1418         }
1419 #endif
1420
1421         if (__inet_inherit_port(sk, newsk) < 0) {
1422                 inet_csk_prepare_forced_close(newsk);
1423                 tcp_done(newsk);
1424                 goto out;
1425         }
1426         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1427                                        &found_dup_sk);
1428         if (*own_req) {
1429                 tcp_move_syn(newtp, req);
1430
1431                 /* Clone pktoptions received with SYN, if we own the req */
1432                 if (ireq->pktopts) {
1433                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1434                         consume_skb(ireq->pktopts);
1435                         ireq->pktopts = NULL;
1436                         if (newnp->pktoptions)
1437                                 tcp_v6_restore_cb(newnp->pktoptions);
1438                 }
1439         } else {
1440                 if (!req_unhash && found_dup_sk) {
1441                         /* This code path should only be executed in the
1442                          * syncookie case only
1443                          */
1444                         bh_unlock_sock(newsk);
1445                         sock_put(newsk);
1446                         newsk = NULL;
1447                 }
1448         }
1449
1450         return newsk;
1451
1452 out_overflow:
1453         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1454 out_nonewsk:
1455         dst_release(dst);
1456 out:
1457         tcp_listendrop(sk);
1458         return NULL;
1459 }
1460
1461 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1462                                                            u32));
1463 /* The socket must have it's spinlock held when we get
1464  * here, unless it is a TCP_LISTEN socket.
1465  *
1466  * We have a potential double-lock case here, so even when
1467  * doing backlog processing we use the BH locking scheme.
1468  * This is because we cannot sleep with the original spinlock
1469  * held.
1470  */
1471 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1472 {
1473         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1474         struct sk_buff *opt_skb = NULL;
1475         struct tcp_sock *tp;
1476
1477         /* Imagine: socket is IPv6. IPv4 packet arrives,
1478            goes to IPv4 receive handler and backlogged.
1479            From backlog it always goes here. Kerboom...
1480            Fortunately, tcp_rcv_established and rcv_established
1481            handle them correctly, but it is not case with
1482            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1483          */
1484
1485         if (skb->protocol == htons(ETH_P_IP))
1486                 return tcp_v4_do_rcv(sk, skb);
1487
1488         /*
1489          *      socket locking is here for SMP purposes as backlog rcv
1490          *      is currently called with bh processing disabled.
1491          */
1492
1493         /* Do Stevens' IPV6_PKTOPTIONS.
1494
1495            Yes, guys, it is the only place in our code, where we
1496            may make it not affecting IPv4.
1497            The rest of code is protocol independent,
1498            and I do not like idea to uglify IPv4.
1499
1500            Actually, all the idea behind IPV6_PKTOPTIONS
1501            looks not very well thought. For now we latch
1502            options, received in the last packet, enqueued
1503            by tcp. Feel free to propose better solution.
1504                                                --ANK (980728)
1505          */
1506         if (np->rxopt.all)
1507                 opt_skb = skb_clone_and_charge_r(skb, sk);
1508
1509         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1510                 struct dst_entry *dst;
1511
1512                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1513                                                 lockdep_sock_is_held(sk));
1514
1515                 sock_rps_save_rxhash(sk, skb);
1516                 sk_mark_napi_id(sk, skb);
1517                 if (dst) {
1518                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1519                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1520                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1521                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1522                                 dst_release(dst);
1523                         }
1524                 }
1525
1526                 tcp_rcv_established(sk, skb);
1527                 if (opt_skb)
1528                         goto ipv6_pktoptions;
1529                 return 0;
1530         }
1531
1532         if (tcp_checksum_complete(skb))
1533                 goto csum_err;
1534
1535         if (sk->sk_state == TCP_LISTEN) {
1536                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1537
1538                 if (!nsk)
1539                         goto discard;
1540
1541                 if (nsk != sk) {
1542                         if (tcp_child_process(sk, nsk, skb))
1543                                 goto reset;
1544                         if (opt_skb)
1545                                 __kfree_skb(opt_skb);
1546                         return 0;
1547                 }
1548         } else
1549                 sock_rps_save_rxhash(sk, skb);
1550
1551         if (tcp_rcv_state_process(sk, skb))
1552                 goto reset;
1553         if (opt_skb)
1554                 goto ipv6_pktoptions;
1555         return 0;
1556
1557 reset:
1558         tcp_v6_send_reset(sk, skb);
1559 discard:
1560         if (opt_skb)
1561                 __kfree_skb(opt_skb);
1562         kfree_skb(skb);
1563         return 0;
1564 csum_err:
1565         trace_tcp_bad_csum(skb);
1566         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1567         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1568         goto discard;
1569
1570
1571 ipv6_pktoptions:
1572         /* Do you ask, what is it?
1573
1574            1. skb was enqueued by tcp.
1575            2. skb is added to tail of read queue, rather than out of order.
1576            3. socket is not in passive state.
1577            4. Finally, it really contains options, which user wants to receive.
1578          */
1579         tp = tcp_sk(sk);
1580         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1581             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1582                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1583                         np->mcast_oif = tcp_v6_iif(opt_skb);
1584                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1585                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1586                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1587                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1588                 if (np->repflow)
1589                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1590                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1591                         tcp_v6_restore_cb(opt_skb);
1592                         opt_skb = xchg(&np->pktoptions, opt_skb);
1593                 } else {
1594                         __kfree_skb(opt_skb);
1595                         opt_skb = xchg(&np->pktoptions, NULL);
1596                 }
1597         }
1598
1599         kfree_skb(opt_skb);
1600         return 0;
1601 }
1602
1603 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1604                            const struct tcphdr *th)
1605 {
1606         /* This is tricky: we move IP6CB at its correct location into
1607          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1608          * _decode_session6() uses IP6CB().
1609          * barrier() makes sure compiler won't play aliasing games.
1610          */
1611         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1612                 sizeof(struct inet6_skb_parm));
1613         barrier();
1614
1615         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1616         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1617                                     skb->len - th->doff*4);
1618         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1619         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1620         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1621         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1622         TCP_SKB_CB(skb)->sacked = 0;
1623         TCP_SKB_CB(skb)->has_rxtstamp =
1624                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1625 }
1626
1627 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1628 {
1629         struct sk_buff *skb_to_free;
1630         int sdif = inet6_sdif(skb);
1631         int dif = inet6_iif(skb);
1632         const struct tcphdr *th;
1633         const struct ipv6hdr *hdr;
1634         bool refcounted;
1635         struct sock *sk;
1636         int ret;
1637         struct net *net = dev_net(skb->dev);
1638
1639         if (skb->pkt_type != PACKET_HOST)
1640                 goto discard_it;
1641
1642         /*
1643          *      Count it even if it's bad.
1644          */
1645         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1646
1647         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1648                 goto discard_it;
1649
1650         th = (const struct tcphdr *)skb->data;
1651
1652         if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1653                 goto bad_packet;
1654         if (!pskb_may_pull(skb, th->doff*4))
1655                 goto discard_it;
1656
1657         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1658                 goto csum_error;
1659
1660         th = (const struct tcphdr *)skb->data;
1661         hdr = ipv6_hdr(skb);
1662
1663 lookup:
1664         sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1665                                 th->source, th->dest, inet6_iif(skb), sdif,
1666                                 &refcounted);
1667         if (!sk)
1668                 goto no_tcp_socket;
1669
1670 process:
1671         if (sk->sk_state == TCP_TIME_WAIT)
1672                 goto do_time_wait;
1673
1674         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1675                 struct request_sock *req = inet_reqsk(sk);
1676                 bool req_stolen = false;
1677                 struct sock *nsk;
1678
1679                 sk = req->rsk_listener;
1680                 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1681                         sk_drops_add(sk, skb);
1682                         reqsk_put(req);
1683                         goto discard_it;
1684                 }
1685                 if (tcp_checksum_complete(skb)) {
1686                         reqsk_put(req);
1687                         goto csum_error;
1688                 }
1689                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1690                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1691                         if (!nsk) {
1692                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1693                                 goto lookup;
1694                         }
1695                         sk = nsk;
1696                         /* reuseport_migrate_sock() has already held one sk_refcnt
1697                          * before returning.
1698                          */
1699                 } else {
1700                         sock_hold(sk);
1701                 }
1702                 refcounted = true;
1703                 nsk = NULL;
1704                 if (!tcp_filter(sk, skb)) {
1705                         th = (const struct tcphdr *)skb->data;
1706                         hdr = ipv6_hdr(skb);
1707                         tcp_v6_fill_cb(skb, hdr, th);
1708                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1709                 }
1710                 if (!nsk) {
1711                         reqsk_put(req);
1712                         if (req_stolen) {
1713                                 /* Another cpu got exclusive access to req
1714                                  * and created a full blown socket.
1715                                  * Try to feed this packet to this socket
1716                                  * instead of discarding it.
1717                                  */
1718                                 tcp_v6_restore_cb(skb);
1719                                 sock_put(sk);
1720                                 goto lookup;
1721                         }
1722                         goto discard_and_relse;
1723                 }
1724                 if (nsk == sk) {
1725                         reqsk_put(req);
1726                         tcp_v6_restore_cb(skb);
1727                 } else if (tcp_child_process(sk, nsk, skb)) {
1728                         tcp_v6_send_reset(nsk, skb);
1729                         goto discard_and_relse;
1730                 } else {
1731                         sock_put(sk);
1732                         return 0;
1733                 }
1734         }
1735         if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1736                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1737                 goto discard_and_relse;
1738         }
1739
1740         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1741                 goto discard_and_relse;
1742
1743         if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1744                 goto discard_and_relse;
1745
1746         if (tcp_filter(sk, skb))
1747                 goto discard_and_relse;
1748         th = (const struct tcphdr *)skb->data;
1749         hdr = ipv6_hdr(skb);
1750         tcp_v6_fill_cb(skb, hdr, th);
1751
1752         skb->dev = NULL;
1753
1754         if (sk->sk_state == TCP_LISTEN) {
1755                 ret = tcp_v6_do_rcv(sk, skb);
1756                 goto put_and_return;
1757         }
1758
1759         sk_incoming_cpu_update(sk);
1760
1761         bh_lock_sock_nested(sk);
1762         tcp_segs_in(tcp_sk(sk), skb);
1763         ret = 0;
1764         if (!sock_owned_by_user(sk)) {
1765                 skb_to_free = sk->sk_rx_skb_cache;
1766                 sk->sk_rx_skb_cache = NULL;
1767                 ret = tcp_v6_do_rcv(sk, skb);
1768         } else {
1769                 if (tcp_add_backlog(sk, skb))
1770                         goto discard_and_relse;
1771                 skb_to_free = NULL;
1772         }
1773         bh_unlock_sock(sk);
1774         if (skb_to_free)
1775                 __kfree_skb(skb_to_free);
1776 put_and_return:
1777         if (refcounted)
1778                 sock_put(sk);
1779         return ret ? -1 : 0;
1780
1781 no_tcp_socket:
1782         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1783                 goto discard_it;
1784
1785         tcp_v6_fill_cb(skb, hdr, th);
1786
1787         if (tcp_checksum_complete(skb)) {
1788 csum_error:
1789                 trace_tcp_bad_csum(skb);
1790                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1791 bad_packet:
1792                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1793         } else {
1794                 tcp_v6_send_reset(NULL, skb);
1795         }
1796
1797 discard_it:
1798         kfree_skb(skb);
1799         return 0;
1800
1801 discard_and_relse:
1802         sk_drops_add(sk, skb);
1803         if (refcounted)
1804                 sock_put(sk);
1805         goto discard_it;
1806
1807 do_time_wait:
1808         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1809                 inet_twsk_put(inet_twsk(sk));
1810                 goto discard_it;
1811         }
1812
1813         tcp_v6_fill_cb(skb, hdr, th);
1814
1815         if (tcp_checksum_complete(skb)) {
1816                 inet_twsk_put(inet_twsk(sk));
1817                 goto csum_error;
1818         }
1819
1820         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1821         case TCP_TW_SYN:
1822         {
1823                 struct sock *sk2;
1824
1825                 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1826                                             skb, __tcp_hdrlen(th),
1827                                             &ipv6_hdr(skb)->saddr, th->source,
1828                                             &ipv6_hdr(skb)->daddr,
1829                                             ntohs(th->dest),
1830                                             tcp_v6_iif_l3_slave(skb),
1831                                             sdif);
1832                 if (sk2) {
1833                         struct inet_timewait_sock *tw = inet_twsk(sk);
1834                         inet_twsk_deschedule_put(tw);
1835                         sk = sk2;
1836                         tcp_v6_restore_cb(skb);
1837                         refcounted = false;
1838                         goto process;
1839                 }
1840         }
1841                 /* to ACK */
1842                 fallthrough;
1843         case TCP_TW_ACK:
1844                 tcp_v6_timewait_ack(sk, skb);
1845                 break;
1846         case TCP_TW_RST:
1847                 tcp_v6_send_reset(sk, skb);
1848                 inet_twsk_deschedule_put(inet_twsk(sk));
1849                 goto discard_it;
1850         case TCP_TW_SUCCESS:
1851                 ;
1852         }
1853         goto discard_it;
1854 }
1855
1856 void tcp_v6_early_demux(struct sk_buff *skb)
1857 {
1858         const struct ipv6hdr *hdr;
1859         const struct tcphdr *th;
1860         struct sock *sk;
1861
1862         if (skb->pkt_type != PACKET_HOST)
1863                 return;
1864
1865         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1866                 return;
1867
1868         hdr = ipv6_hdr(skb);
1869         th = tcp_hdr(skb);
1870
1871         if (th->doff < sizeof(struct tcphdr) / 4)
1872                 return;
1873
1874         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1875         sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1876                                         &hdr->saddr, th->source,
1877                                         &hdr->daddr, ntohs(th->dest),
1878                                         inet6_iif(skb), inet6_sdif(skb));
1879         if (sk) {
1880                 skb->sk = sk;
1881                 skb->destructor = sock_edemux;
1882                 if (sk_fullsock(sk)) {
1883                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1884
1885                         if (dst)
1886                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1887                         if (dst &&
1888                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1889                                 skb_dst_set_noref(skb, dst);
1890                 }
1891         }
1892 }
1893
1894 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1895         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1896         .twsk_unique    = tcp_twsk_unique,
1897         .twsk_destructor = tcp_twsk_destructor,
1898 };
1899
1900 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1901 {
1902         struct ipv6_pinfo *np = inet6_sk(sk);
1903
1904         __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1905 }
1906
1907 const struct inet_connection_sock_af_ops ipv6_specific = {
1908         .queue_xmit        = inet6_csk_xmit,
1909         .send_check        = tcp_v6_send_check,
1910         .rebuild_header    = inet6_sk_rebuild_header,
1911         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1912         .conn_request      = tcp_v6_conn_request,
1913         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1914         .net_header_len    = sizeof(struct ipv6hdr),
1915         .net_frag_header_len = sizeof(struct frag_hdr),
1916         .setsockopt        = ipv6_setsockopt,
1917         .getsockopt        = ipv6_getsockopt,
1918         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1919         .sockaddr_len      = sizeof(struct sockaddr_in6),
1920         .mtu_reduced       = tcp_v6_mtu_reduced,
1921 };
1922
1923 #ifdef CONFIG_TCP_MD5SIG
1924 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1925         .md5_lookup     =       tcp_v6_md5_lookup,
1926         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1927         .md5_parse      =       tcp_v6_parse_md5_keys,
1928 };
1929 #endif
1930
1931 /*
1932  *      TCP over IPv4 via INET6 API
1933  */
1934 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1935         .queue_xmit        = ip_queue_xmit,
1936         .send_check        = tcp_v4_send_check,
1937         .rebuild_header    = inet_sk_rebuild_header,
1938         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1939         .conn_request      = tcp_v6_conn_request,
1940         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1941         .net_header_len    = sizeof(struct iphdr),
1942         .setsockopt        = ipv6_setsockopt,
1943         .getsockopt        = ipv6_getsockopt,
1944         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1945         .sockaddr_len      = sizeof(struct sockaddr_in6),
1946         .mtu_reduced       = tcp_v4_mtu_reduced,
1947 };
1948
1949 #ifdef CONFIG_TCP_MD5SIG
1950 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1951         .md5_lookup     =       tcp_v4_md5_lookup,
1952         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1953         .md5_parse      =       tcp_v6_parse_md5_keys,
1954 };
1955 #endif
1956
1957 /* NOTE: A lot of things set to zero explicitly by call to
1958  *       sk_alloc() so need not be done here.
1959  */
1960 static int tcp_v6_init_sock(struct sock *sk)
1961 {
1962         struct inet_connection_sock *icsk = inet_csk(sk);
1963
1964         tcp_init_sock(sk);
1965
1966         icsk->icsk_af_ops = &ipv6_specific;
1967
1968 #ifdef CONFIG_TCP_MD5SIG
1969         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1970 #endif
1971
1972         return 0;
1973 }
1974
1975 #ifdef CONFIG_PROC_FS
1976 /* Proc filesystem TCPv6 sock list dumping. */
1977 static void get_openreq6(struct seq_file *seq,
1978                          const struct request_sock *req, int i)
1979 {
1980         long ttd = req->rsk_timer.expires - jiffies;
1981         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1982         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1983
1984         if (ttd < 0)
1985                 ttd = 0;
1986
1987         seq_printf(seq,
1988                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1989                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1990                    i,
1991                    src->s6_addr32[0], src->s6_addr32[1],
1992                    src->s6_addr32[2], src->s6_addr32[3],
1993                    inet_rsk(req)->ir_num,
1994                    dest->s6_addr32[0], dest->s6_addr32[1],
1995                    dest->s6_addr32[2], dest->s6_addr32[3],
1996                    ntohs(inet_rsk(req)->ir_rmt_port),
1997                    TCP_SYN_RECV,
1998                    0, 0, /* could print option size, but that is af dependent. */
1999                    1,   /* timers active (only the expire timer) */
2000                    jiffies_to_clock_t(ttd),
2001                    req->num_timeout,
2002                    from_kuid_munged(seq_user_ns(seq),
2003                                     sock_i_uid(req->rsk_listener)),
2004                    0,  /* non standard timer */
2005                    0, /* open_requests have no inode */
2006                    0, req);
2007 }
2008
2009 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2010 {
2011         const struct in6_addr *dest, *src;
2012         __u16 destp, srcp;
2013         int timer_active;
2014         unsigned long timer_expires;
2015         const struct inet_sock *inet = inet_sk(sp);
2016         const struct tcp_sock *tp = tcp_sk(sp);
2017         const struct inet_connection_sock *icsk = inet_csk(sp);
2018         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2019         int rx_queue;
2020         int state;
2021
2022         dest  = &sp->sk_v6_daddr;
2023         src   = &sp->sk_v6_rcv_saddr;
2024         destp = ntohs(inet->inet_dport);
2025         srcp  = ntohs(inet->inet_sport);
2026
2027         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2028             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2029             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2030                 timer_active    = 1;
2031                 timer_expires   = icsk->icsk_timeout;
2032         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2033                 timer_active    = 4;
2034                 timer_expires   = icsk->icsk_timeout;
2035         } else if (timer_pending(&sp->sk_timer)) {
2036                 timer_active    = 2;
2037                 timer_expires   = sp->sk_timer.expires;
2038         } else {
2039                 timer_active    = 0;
2040                 timer_expires = jiffies;
2041         }
2042
2043         state = inet_sk_state_load(sp);
2044         if (state == TCP_LISTEN)
2045                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2046         else
2047                 /* Because we don't lock the socket,
2048                  * we might find a transient negative value.
2049                  */
2050                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2051                                       READ_ONCE(tp->copied_seq), 0);
2052
2053         seq_printf(seq,
2054                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2055                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2056                    i,
2057                    src->s6_addr32[0], src->s6_addr32[1],
2058                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2059                    dest->s6_addr32[0], dest->s6_addr32[1],
2060                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2061                    state,
2062                    READ_ONCE(tp->write_seq) - tp->snd_una,
2063                    rx_queue,
2064                    timer_active,
2065                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2066                    icsk->icsk_retransmits,
2067                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2068                    icsk->icsk_probes_out,
2069                    sock_i_ino(sp),
2070                    refcount_read(&sp->sk_refcnt), sp,
2071                    jiffies_to_clock_t(icsk->icsk_rto),
2072                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2073                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2074                    tcp_snd_cwnd(tp),
2075                    state == TCP_LISTEN ?
2076                         fastopenq->max_qlen :
2077                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2078                    );
2079 }
2080
2081 static void get_timewait6_sock(struct seq_file *seq,
2082                                struct inet_timewait_sock *tw, int i)
2083 {
2084         long delta = tw->tw_timer.expires - jiffies;
2085         const struct in6_addr *dest, *src;
2086         __u16 destp, srcp;
2087
2088         dest = &tw->tw_v6_daddr;
2089         src  = &tw->tw_v6_rcv_saddr;
2090         destp = ntohs(tw->tw_dport);
2091         srcp  = ntohs(tw->tw_sport);
2092
2093         seq_printf(seq,
2094                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2095                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2096                    i,
2097                    src->s6_addr32[0], src->s6_addr32[1],
2098                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2099                    dest->s6_addr32[0], dest->s6_addr32[1],
2100                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2101                    tw->tw_substate, 0, 0,
2102                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2103                    refcount_read(&tw->tw_refcnt), tw);
2104 }
2105
2106 static int tcp6_seq_show(struct seq_file *seq, void *v)
2107 {
2108         struct tcp_iter_state *st;
2109         struct sock *sk = v;
2110
2111         if (v == SEQ_START_TOKEN) {
2112                 seq_puts(seq,
2113                          "  sl  "
2114                          "local_address                         "
2115                          "remote_address                        "
2116                          "st tx_queue rx_queue tr tm->when retrnsmt"
2117                          "   uid  timeout inode\n");
2118                 goto out;
2119         }
2120         st = seq->private;
2121
2122         if (sk->sk_state == TCP_TIME_WAIT)
2123                 get_timewait6_sock(seq, v, st->num);
2124         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2125                 get_openreq6(seq, v, st->num);
2126         else
2127                 get_tcp6_sock(seq, v, st->num);
2128 out:
2129         return 0;
2130 }
2131
2132 static const struct seq_operations tcp6_seq_ops = {
2133         .show           = tcp6_seq_show,
2134         .start          = tcp_seq_start,
2135         .next           = tcp_seq_next,
2136         .stop           = tcp_seq_stop,
2137 };
2138
2139 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2140         .family         = AF_INET6,
2141 };
2142
2143 int __net_init tcp6_proc_init(struct net *net)
2144 {
2145         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2146                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2147                 return -ENOMEM;
2148         return 0;
2149 }
2150
2151 void tcp6_proc_exit(struct net *net)
2152 {
2153         remove_proc_entry("tcp6", net->proc_net);
2154 }
2155 #endif
2156
2157 struct proto tcpv6_prot = {
2158         .name                   = "TCPv6",
2159         .owner                  = THIS_MODULE,
2160         .close                  = tcp_close,
2161         .pre_connect            = tcp_v6_pre_connect,
2162         .connect                = tcp_v6_connect,
2163         .disconnect             = tcp_disconnect,
2164         .accept                 = inet_csk_accept,
2165         .ioctl                  = tcp_ioctl,
2166         .init                   = tcp_v6_init_sock,
2167         .destroy                = tcp_v4_destroy_sock,
2168         .shutdown               = tcp_shutdown,
2169         .setsockopt             = tcp_setsockopt,
2170         .getsockopt             = tcp_getsockopt,
2171         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2172         .keepalive              = tcp_set_keepalive,
2173         .recvmsg                = tcp_recvmsg,
2174         .sendmsg                = tcp_sendmsg,
2175         .sendpage               = tcp_sendpage,
2176         .backlog_rcv            = tcp_v6_do_rcv,
2177         .release_cb             = tcp_release_cb,
2178         .hash                   = inet6_hash,
2179         .unhash                 = inet_unhash,
2180         .get_port               = inet_csk_get_port,
2181 #ifdef CONFIG_BPF_SYSCALL
2182         .psock_update_sk_prot   = tcp_bpf_update_proto,
2183 #endif
2184         .enter_memory_pressure  = tcp_enter_memory_pressure,
2185         .leave_memory_pressure  = tcp_leave_memory_pressure,
2186         .stream_memory_free     = tcp_stream_memory_free,
2187         .sockets_allocated      = &tcp_sockets_allocated,
2188         .memory_allocated       = &tcp_memory_allocated,
2189         .memory_pressure        = &tcp_memory_pressure,
2190         .orphan_count           = &tcp_orphan_count,
2191         .sysctl_mem             = sysctl_tcp_mem,
2192         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2193         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2194         .max_header             = MAX_TCP_HEADER,
2195         .obj_size               = sizeof(struct tcp6_sock),
2196         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2197         .twsk_prot              = &tcp6_timewait_sock_ops,
2198         .rsk_prot               = &tcp6_request_sock_ops,
2199         .h.hashinfo             = &tcp_hashinfo,
2200         .no_autobind            = true,
2201         .diag_destroy           = tcp_abort,
2202 };
2203 EXPORT_SYMBOL_GPL(tcpv6_prot);
2204
2205 static const struct inet6_protocol tcpv6_protocol = {
2206         .handler        =       tcp_v6_rcv,
2207         .err_handler    =       tcp_v6_err,
2208         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2209 };
2210
2211 static struct inet_protosw tcpv6_protosw = {
2212         .type           =       SOCK_STREAM,
2213         .protocol       =       IPPROTO_TCP,
2214         .prot           =       &tcpv6_prot,
2215         .ops            =       &inet6_stream_ops,
2216         .flags          =       INET_PROTOSW_PERMANENT |
2217                                 INET_PROTOSW_ICSK,
2218 };
2219
2220 static int __net_init tcpv6_net_init(struct net *net)
2221 {
2222         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2223                                     SOCK_RAW, IPPROTO_TCP, net);
2224 }
2225
2226 static void __net_exit tcpv6_net_exit(struct net *net)
2227 {
2228         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2229 }
2230
2231 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2232 {
2233         inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2234 }
2235
2236 static struct pernet_operations tcpv6_net_ops = {
2237         .init       = tcpv6_net_init,
2238         .exit       = tcpv6_net_exit,
2239         .exit_batch = tcpv6_net_exit_batch,
2240 };
2241
2242 int __init tcpv6_init(void)
2243 {
2244         int ret;
2245
2246         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2247         if (ret)
2248                 goto out;
2249
2250         /* register inet6 protocol */
2251         ret = inet6_register_protosw(&tcpv6_protosw);
2252         if (ret)
2253                 goto out_tcpv6_protocol;
2254
2255         ret = register_pernet_subsys(&tcpv6_net_ops);
2256         if (ret)
2257                 goto out_tcpv6_protosw;
2258
2259         ret = mptcpv6_init();
2260         if (ret)
2261                 goto out_tcpv6_pernet_subsys;
2262
2263 out:
2264         return ret;
2265
2266 out_tcpv6_pernet_subsys:
2267         unregister_pernet_subsys(&tcpv6_net_ops);
2268 out_tcpv6_protosw:
2269         inet6_unregister_protosw(&tcpv6_protosw);
2270 out_tcpv6_protocol:
2271         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2272         goto out;
2273 }
2274
2275 void tcpv6_exit(void)
2276 {
2277         unregister_pernet_subsys(&tcpv6_net_ops);
2278         inet6_unregister_protosw(&tcpv6_protosw);
2279         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2280 }