GNU Linux-libre 6.1.24-gnu
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276         fl6.flowi6_oif = sk->sk_bound_dev_if;
277         fl6.flowi6_mark = sk->sk_mark;
278         fl6.fl6_dport = usin->sin6_port;
279         fl6.fl6_sport = inet->inet_sport;
280         fl6.flowi6_uid = sk->sk_uid;
281
282         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283         final_p = fl6_update_dst(&fl6, opt, &final);
284
285         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286
287         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288         if (IS_ERR(dst)) {
289                 err = PTR_ERR(dst);
290                 goto failure;
291         }
292
293         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294
295         if (!saddr) {
296                 saddr = &fl6.saddr;
297
298                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299                 if (err)
300                         goto failure;
301         }
302
303         /* set the source address */
304         np->saddr = *saddr;
305         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306
307         sk->sk_gso_type = SKB_GSO_TCPV6;
308         ip6_dst_store(sk, dst, NULL, NULL);
309
310         icsk->icsk_ext_hdr_len = 0;
311         if (opt)
312                 icsk->icsk_ext_hdr_len = opt->opt_flen +
313                                          opt->opt_nflen;
314
315         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316
317         inet->inet_dport = usin->sin6_port;
318
319         tcp_set_state(sk, TCP_SYN_SENT);
320         err = inet6_hash_connect(tcp_death_row, sk);
321         if (err)
322                 goto late_failure;
323
324         sk_set_txhash(sk);
325
326         if (likely(!tp->repair)) {
327                 if (!tp->write_seq)
328                         WRITE_ONCE(tp->write_seq,
329                                    secure_tcpv6_seq(np->saddr.s6_addr32,
330                                                     sk->sk_v6_daddr.s6_addr32,
331                                                     inet->inet_sport,
332                                                     inet->inet_dport));
333                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334                                                    sk->sk_v6_daddr.s6_addr32);
335         }
336
337         if (tcp_fastopen_defer_connect(sk, &err))
338                 return err;
339         if (err)
340                 goto late_failure;
341
342         err = tcp_connect(sk);
343         if (err)
344                 goto late_failure;
345
346         return 0;
347
348 late_failure:
349         tcp_set_state(sk, TCP_CLOSE);
350         inet_bhash2_reset_saddr(sk);
351 failure:
352         inet->inet_dport = 0;
353         sk->sk_route_caps = 0;
354         return err;
355 }
356
357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359         struct dst_entry *dst;
360         u32 mtu;
361
362         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363                 return;
364
365         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366
367         /* Drop requests trying to increase our current mss.
368          * Check done in __ip6_rt_update_pmtu() is too late.
369          */
370         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371                 return;
372
373         dst = inet6_csk_update_pmtu(sk, mtu);
374         if (!dst)
375                 return;
376
377         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378                 tcp_sync_mss(sk, dst_mtu(dst));
379                 tcp_simple_retransmit(sk);
380         }
381 }
382
383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384                 u8 type, u8 code, int offset, __be32 info)
385 {
386         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388         struct net *net = dev_net(skb->dev);
389         struct request_sock *fastopen;
390         struct ipv6_pinfo *np;
391         struct tcp_sock *tp;
392         __u32 seq, snd_una;
393         struct sock *sk;
394         bool fatal;
395         int err;
396
397         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398                                         &hdr->daddr, th->dest,
399                                         &hdr->saddr, ntohs(th->source),
400                                         skb->dev->ifindex, inet6_sdif(skb));
401
402         if (!sk) {
403                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404                                   ICMP6_MIB_INERRORS);
405                 return -ENOENT;
406         }
407
408         if (sk->sk_state == TCP_TIME_WAIT) {
409                 inet_twsk_put(inet_twsk(sk));
410                 return 0;
411         }
412         seq = ntohl(th->seq);
413         fatal = icmpv6_err_convert(type, code, &err);
414         if (sk->sk_state == TCP_NEW_SYN_RECV) {
415                 tcp_req_err(sk, seq, fatal);
416                 return 0;
417         }
418
419         bh_lock_sock(sk);
420         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422
423         if (sk->sk_state == TCP_CLOSE)
424                 goto out;
425
426         if (static_branch_unlikely(&ip6_min_hopcount)) {
427                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430                         goto out;
431                 }
432         }
433
434         tp = tcp_sk(sk);
435         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436         fastopen = rcu_dereference(tp->fastopen_rsk);
437         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438         if (sk->sk_state != TCP_LISTEN &&
439             !between(seq, snd_una, tp->snd_nxt)) {
440                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441                 goto out;
442         }
443
444         np = tcp_inet6_sk(sk);
445
446         if (type == NDISC_REDIRECT) {
447                 if (!sock_owned_by_user(sk)) {
448                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449
450                         if (dst)
451                                 dst->ops->redirect(dst, sk, skb);
452                 }
453                 goto out;
454         }
455
456         if (type == ICMPV6_PKT_TOOBIG) {
457                 u32 mtu = ntohl(info);
458
459                 /* We are not interested in TCP_LISTEN and open_requests
460                  * (SYN-ACKs send out by Linux are always <576bytes so
461                  * they should go through unfragmented).
462                  */
463                 if (sk->sk_state == TCP_LISTEN)
464                         goto out;
465
466                 if (!ip6_sk_accept_pmtu(sk))
467                         goto out;
468
469                 if (mtu < IPV6_MIN_MTU)
470                         goto out;
471
472                 WRITE_ONCE(tp->mtu_info, mtu);
473
474                 if (!sock_owned_by_user(sk))
475                         tcp_v6_mtu_reduced(sk);
476                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477                                            &sk->sk_tsq_flags))
478                         sock_hold(sk);
479                 goto out;
480         }
481
482
483         /* Might be for an request_sock */
484         switch (sk->sk_state) {
485         case TCP_SYN_SENT:
486         case TCP_SYN_RECV:
487                 /* Only in fast or simultaneous open. If a fast open socket is
488                  * already accepted it is treated as a connected one below.
489                  */
490                 if (fastopen && !fastopen->sk)
491                         break;
492
493                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494
495                 if (!sock_owned_by_user(sk)) {
496                         sk->sk_err = err;
497                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
498
499                         tcp_done(sk);
500                 } else
501                         sk->sk_err_soft = err;
502                 goto out;
503         case TCP_LISTEN:
504                 break;
505         default:
506                 /* check if this ICMP message allows revert of backoff.
507                  * (see RFC 6069)
508                  */
509                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
510                     code == ICMPV6_NOROUTE)
511                         tcp_ld_RTO_revert(sk, seq);
512         }
513
514         if (!sock_owned_by_user(sk) && np->recverr) {
515                 sk->sk_err = err;
516                 sk_error_report(sk);
517         } else
518                 sk->sk_err_soft = err;
519
520 out:
521         bh_unlock_sock(sk);
522         sock_put(sk);
523         return 0;
524 }
525
526
527 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
528                               struct flowi *fl,
529                               struct request_sock *req,
530                               struct tcp_fastopen_cookie *foc,
531                               enum tcp_synack_type synack_type,
532                               struct sk_buff *syn_skb)
533 {
534         struct inet_request_sock *ireq = inet_rsk(req);
535         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
536         struct ipv6_txoptions *opt;
537         struct flowi6 *fl6 = &fl->u.ip6;
538         struct sk_buff *skb;
539         int err = -ENOMEM;
540         u8 tclass;
541
542         /* First, grab a route. */
543         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
544                                                IPPROTO_TCP)) == NULL)
545                 goto done;
546
547         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548
549         if (skb) {
550                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
551                                     &ireq->ir_v6_rmt_addr);
552
553                 fl6->daddr = ireq->ir_v6_rmt_addr;
554                 if (np->repflow && ireq->pktopts)
555                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
556
557                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
558                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
559                                 (np->tclass & INET_ECN_MASK) :
560                                 np->tclass;
561
562                 if (!INET_ECN_is_capable(tclass) &&
563                     tcp_bpf_ca_needs_ecn((struct sock *)req))
564                         tclass |= INET_ECN_ECT_0;
565
566                 rcu_read_lock();
567                 opt = ireq->ipv6_opt;
568                 if (!opt)
569                         opt = rcu_dereference(np->opt);
570                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
571                                tclass, sk->sk_priority);
572                 rcu_read_unlock();
573                 err = net_xmit_eval(err);
574         }
575
576 done:
577         return err;
578 }
579
580
581 static void tcp_v6_reqsk_destructor(struct request_sock *req)
582 {
583         kfree(inet_rsk(req)->ipv6_opt);
584         consume_skb(inet_rsk(req)->pktopts);
585 }
586
587 #ifdef CONFIG_TCP_MD5SIG
588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
589                                                    const struct in6_addr *addr,
590                                                    int l3index)
591 {
592         return tcp_md5_do_lookup(sk, l3index,
593                                  (union tcp_md5_addr *)addr, AF_INET6);
594 }
595
596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
597                                                 const struct sock *addr_sk)
598 {
599         int l3index;
600
601         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
602                                                  addr_sk->sk_bound_dev_if);
603         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
604                                     l3index);
605 }
606
607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
608                                  sockptr_t optval, int optlen)
609 {
610         struct tcp_md5sig cmd;
611         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
612         int l3index = 0;
613         u8 prefixlen;
614         u8 flags;
615
616         if (optlen < sizeof(cmd))
617                 return -EINVAL;
618
619         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620                 return -EFAULT;
621
622         if (sin6->sin6_family != AF_INET6)
623                 return -EINVAL;
624
625         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626
627         if (optname == TCP_MD5SIG_EXT &&
628             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
629                 prefixlen = cmd.tcpm_prefixlen;
630                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
631                                         prefixlen > 32))
632                         return -EINVAL;
633         } else {
634                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635         }
636
637         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
638             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
639                 struct net_device *dev;
640
641                 rcu_read_lock();
642                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
643                 if (dev && netif_is_l3_master(dev))
644                         l3index = dev->ifindex;
645                 rcu_read_unlock();
646
647                 /* ok to reference set/not set outside of rcu;
648                  * right now device MUST be an L3 master
649                  */
650                 if (!dev || !l3index)
651                         return -EINVAL;
652         }
653
654         if (!cmd.tcpm_keylen) {
655                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657                                               AF_INET, prefixlen,
658                                               l3index, flags);
659                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660                                       AF_INET6, prefixlen, l3index, flags);
661         }
662
663         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664                 return -EINVAL;
665
666         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668                                       AF_INET, prefixlen, l3index, flags,
669                                       cmd.tcpm_key, cmd.tcpm_keylen,
670                                       GFP_KERNEL);
671
672         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673                               AF_INET6, prefixlen, l3index, flags,
674                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
675 }
676
677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678                                    const struct in6_addr *daddr,
679                                    const struct in6_addr *saddr,
680                                    const struct tcphdr *th, int nbytes)
681 {
682         struct tcp6_pseudohdr *bp;
683         struct scatterlist sg;
684         struct tcphdr *_th;
685
686         bp = hp->scratch;
687         /* 1. TCP pseudo-header (RFC2460) */
688         bp->saddr = *saddr;
689         bp->daddr = *daddr;
690         bp->protocol = cpu_to_be32(IPPROTO_TCP);
691         bp->len = cpu_to_be32(nbytes);
692
693         _th = (struct tcphdr *)(bp + 1);
694         memcpy(_th, th, sizeof(*th));
695         _th->check = 0;
696
697         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699                                 sizeof(*bp) + sizeof(*th));
700         return crypto_ahash_update(hp->md5_req);
701 }
702
703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704                                const struct in6_addr *daddr, struct in6_addr *saddr,
705                                const struct tcphdr *th)
706 {
707         struct tcp_md5sig_pool *hp;
708         struct ahash_request *req;
709
710         hp = tcp_get_md5sig_pool();
711         if (!hp)
712                 goto clear_hash_noput;
713         req = hp->md5_req;
714
715         if (crypto_ahash_init(req))
716                 goto clear_hash;
717         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718                 goto clear_hash;
719         if (tcp_md5_hash_key(hp, key))
720                 goto clear_hash;
721         ahash_request_set_crypt(req, NULL, md5_hash, 0);
722         if (crypto_ahash_final(req))
723                 goto clear_hash;
724
725         tcp_put_md5sig_pool();
726         return 0;
727
728 clear_hash:
729         tcp_put_md5sig_pool();
730 clear_hash_noput:
731         memset(md5_hash, 0, 16);
732         return 1;
733 }
734
735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736                                const struct tcp_md5sig_key *key,
737                                const struct sock *sk,
738                                const struct sk_buff *skb)
739 {
740         const struct in6_addr *saddr, *daddr;
741         struct tcp_md5sig_pool *hp;
742         struct ahash_request *req;
743         const struct tcphdr *th = tcp_hdr(skb);
744
745         if (sk) { /* valid for establish/request sockets */
746                 saddr = &sk->sk_v6_rcv_saddr;
747                 daddr = &sk->sk_v6_daddr;
748         } else {
749                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750                 saddr = &ip6h->saddr;
751                 daddr = &ip6h->daddr;
752         }
753
754         hp = tcp_get_md5sig_pool();
755         if (!hp)
756                 goto clear_hash_noput;
757         req = hp->md5_req;
758
759         if (crypto_ahash_init(req))
760                 goto clear_hash;
761
762         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763                 goto clear_hash;
764         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765                 goto clear_hash;
766         if (tcp_md5_hash_key(hp, key))
767                 goto clear_hash;
768         ahash_request_set_crypt(req, NULL, md5_hash, 0);
769         if (crypto_ahash_final(req))
770                 goto clear_hash;
771
772         tcp_put_md5sig_pool();
773         return 0;
774
775 clear_hash:
776         tcp_put_md5sig_pool();
777 clear_hash_noput:
778         memset(md5_hash, 0, 16);
779         return 1;
780 }
781
782 #endif
783
784 static void tcp_v6_init_req(struct request_sock *req,
785                             const struct sock *sk_listener,
786                             struct sk_buff *skb)
787 {
788         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789         struct inet_request_sock *ireq = inet_rsk(req);
790         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791
792         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794
795         /* So that link locals have meaning */
796         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798                 ireq->ir_iif = tcp_v6_iif(skb);
799
800         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802              np->rxopt.bits.rxinfo ||
803              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804              np->rxopt.bits.rxohlim || np->repflow)) {
805                 refcount_inc(&skb->users);
806                 ireq->pktopts = skb;
807         }
808 }
809
810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811                                           struct sk_buff *skb,
812                                           struct flowi *fl,
813                                           struct request_sock *req)
814 {
815         tcp_v6_init_req(req, sk, skb);
816
817         if (security_inet_conn_request(sk, skb, req))
818                 return NULL;
819
820         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824         .family         =       AF_INET6,
825         .obj_size       =       sizeof(struct tcp6_request_sock),
826         .rtx_syn_ack    =       tcp_rtx_synack,
827         .send_ack       =       tcp_v6_reqsk_send_ack,
828         .destructor     =       tcp_v6_reqsk_destructor,
829         .send_reset     =       tcp_v6_send_reset,
830         .syn_ack_timeout =      tcp_syn_ack_timeout,
831 };
832
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
835                                 sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837         .req_md5_lookup =       tcp_v6_md5_lookup,
838         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841         .cookie_init_seq =      cookie_v6_init_sequence,
842 #endif
843         .route_req      =       tcp_v6_route_req,
844         .init_seq       =       tcp_v6_init_seq,
845         .init_ts_off    =       tcp_v6_init_ts_off,
846         .send_synack    =       tcp_v6_send_synack,
847 };
848
849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
851                                  int oif, struct tcp_md5sig_key *key, int rst,
852                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854         const struct tcphdr *th = tcp_hdr(skb);
855         struct tcphdr *t1;
856         struct sk_buff *buff;
857         struct flowi6 fl6;
858         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859         struct sock *ctl_sk = net->ipv6.tcp_sk;
860         unsigned int tot_len = sizeof(struct tcphdr);
861         __be32 mrst = 0, *topt;
862         struct dst_entry *dst;
863         __u32 mark = 0;
864
865         if (tsecr)
866                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868         if (key)
869                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871
872 #ifdef CONFIG_MPTCP
873         if (rst && !key) {
874                 mrst = mptcp_reset_option(skb);
875
876                 if (mrst)
877                         tot_len += sizeof(__be32);
878         }
879 #endif
880
881         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882         if (!buff)
883                 return;
884
885         skb_reserve(buff, MAX_TCP_HEADER);
886
887         t1 = skb_push(buff, tot_len);
888         skb_reset_transport_header(buff);
889
890         /* Swap the send and the receive. */
891         memset(t1, 0, sizeof(*t1));
892         t1->dest = th->source;
893         t1->source = th->dest;
894         t1->doff = tot_len / 4;
895         t1->seq = htonl(seq);
896         t1->ack_seq = htonl(ack);
897         t1->ack = !rst || !th->ack;
898         t1->rst = rst;
899         t1->window = htons(win);
900
901         topt = (__be32 *)(t1 + 1);
902
903         if (tsecr) {
904                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906                 *topt++ = htonl(tsval);
907                 *topt++ = htonl(tsecr);
908         }
909
910         if (mrst)
911                 *topt++ = mrst;
912
913 #ifdef CONFIG_TCP_MD5SIG
914         if (key) {
915                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918                                     &ipv6_hdr(skb)->saddr,
919                                     &ipv6_hdr(skb)->daddr, t1);
920         }
921 #endif
922
923         memset(&fl6, 0, sizeof(fl6));
924         fl6.daddr = ipv6_hdr(skb)->saddr;
925         fl6.saddr = ipv6_hdr(skb)->daddr;
926         fl6.flowlabel = label;
927
928         buff->ip_summed = CHECKSUM_PARTIAL;
929
930         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931
932         fl6.flowi6_proto = IPPROTO_TCP;
933         if (rt6_need_strict(&fl6.daddr) && !oif)
934                 fl6.flowi6_oif = tcp_v6_iif(skb);
935         else {
936                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937                         oif = skb->skb_iif;
938
939                 fl6.flowi6_oif = oif;
940         }
941
942         if (sk) {
943                 if (sk->sk_state == TCP_TIME_WAIT)
944                         mark = inet_twsk(sk)->tw_mark;
945                 else
946                         mark = sk->sk_mark;
947                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948         }
949         if (txhash) {
950                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952         }
953         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954         fl6.fl6_dport = t1->dest;
955         fl6.fl6_sport = t1->source;
956         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958
959         /* Pass a socket to ip6_dst_lookup either it is for RST
960          * Underlying function will use this to retrieve the network
961          * namespace
962          */
963         if (sk && sk->sk_state != TCP_TIME_WAIT)
964                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965         else
966                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967         if (!IS_ERR(dst)) {
968                 skb_dst_set(buff, dst);
969                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970                          tclass & ~INET_ECN_MASK, priority);
971                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972                 if (rst)
973                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974                 return;
975         }
976
977         kfree_skb(buff);
978 }
979
980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982         const struct tcphdr *th = tcp_hdr(skb);
983         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984         u32 seq = 0, ack_seq = 0;
985         struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987         const __u8 *hash_location = NULL;
988         unsigned char newhash[16];
989         int genhash;
990         struct sock *sk1 = NULL;
991 #endif
992         __be32 label = 0;
993         u32 priority = 0;
994         struct net *net;
995         u32 txhash = 0;
996         int oif = 0;
997
998         if (th->rst)
999                 return;
1000
1001         /* If sk not NULL, it means we did a successful lookup and incoming
1002          * route had to be correct. prequeue might have dropped our dst.
1003          */
1004         if (!sk && !ipv6_unicast_destination(skb))
1005                 return;
1006
1007         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009         rcu_read_lock();
1010         hash_location = tcp_parse_md5sig_option(th);
1011         if (sk && sk_fullsock(sk)) {
1012                 int l3index;
1013
1014                 /* sdif set, means packet ingressed via a device
1015                  * in an L3 domain and inet_iif is set to it.
1016                  */
1017                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019         } else if (hash_location) {
1020                 int dif = tcp_v6_iif_l3_slave(skb);
1021                 int sdif = tcp_v6_sdif(skb);
1022                 int l3index;
1023
1024                 /*
1025                  * active side is lost. Try to find listening socket through
1026                  * source port, and then find md5 key through listening socket.
1027                  * we are not loose security here:
1028                  * Incoming packet is checked with md5 hash with finding key,
1029                  * no RST generated if md5 hash doesn't match.
1030                  */
1031                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032                                             NULL, 0, &ipv6h->saddr, th->source,
1033                                             &ipv6h->daddr, ntohs(th->source),
1034                                             dif, sdif);
1035                 if (!sk1)
1036                         goto out;
1037
1038                 /* sdif set, means packet ingressed via a device
1039                  * in an L3 domain and dif is set to it.
1040                  */
1041                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044                 if (!key)
1045                         goto out;
1046
1047                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049                         goto out;
1050         }
1051 #endif
1052
1053         if (th->ack)
1054                 seq = ntohl(th->ack_seq);
1055         else
1056                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057                           (th->doff << 2);
1058
1059         if (sk) {
1060                 oif = sk->sk_bound_dev_if;
1061                 if (sk_fullsock(sk)) {
1062                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063
1064                         trace_tcp_send_reset(sk, skb);
1065                         if (np->repflow)
1066                                 label = ip6_flowlabel(ipv6h);
1067                         priority = sk->sk_priority;
1068                         txhash = sk->sk_hash;
1069                 }
1070                 if (sk->sk_state == TCP_TIME_WAIT) {
1071                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072                         priority = inet_twsk(sk)->tw_priority;
1073                         txhash = inet_twsk(sk)->tw_txhash;
1074                 }
1075         } else {
1076                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077                         label = ip6_flowlabel(ipv6h);
1078         }
1079
1080         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085         rcu_read_unlock();
1086 #endif
1087 }
1088
1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091                             struct tcp_md5sig_key *key, u8 tclass,
1092                             __be32 label, u32 priority, u32 txhash)
1093 {
1094         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095                              tclass, label, priority, txhash);
1096 }
1097
1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100         struct inet_timewait_sock *tw = inet_twsk(sk);
1101         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102
1103         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108                         tw->tw_txhash);
1109
1110         inet_twsk_put(tw);
1111 }
1112
1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114                                   struct request_sock *req)
1115 {
1116         int l3index;
1117
1118         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122          */
1123         /* RFC 7323 2.3
1124          * The window field (SEG.WND) of every outgoing segment, with the
1125          * exception of <SYN> segments, MUST be right-shifted by
1126          * Rcv.Wind.Shift bits:
1127          */
1128         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130                         tcp_rsk(req)->rcv_nxt,
1131                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133                         req->ts_recent, sk->sk_bound_dev_if,
1134                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135                         ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
1136                         tcp_rsk(req)->txhash);
1137 }
1138
1139
1140 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1141 {
1142 #ifdef CONFIG_SYN_COOKIES
1143         const struct tcphdr *th = tcp_hdr(skb);
1144
1145         if (!th->syn)
1146                 sk = cookie_v6_check(sk, skb);
1147 #endif
1148         return sk;
1149 }
1150
1151 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1152                          struct tcphdr *th, u32 *cookie)
1153 {
1154         u16 mss = 0;
1155 #ifdef CONFIG_SYN_COOKIES
1156         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1157                                     &tcp_request_sock_ipv6_ops, sk, th);
1158         if (mss) {
1159                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1160                 tcp_synq_overflow(sk);
1161         }
1162 #endif
1163         return mss;
1164 }
1165
1166 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1167 {
1168         if (skb->protocol == htons(ETH_P_IP))
1169                 return tcp_v4_conn_request(sk, skb);
1170
1171         if (!ipv6_unicast_destination(skb))
1172                 goto drop;
1173
1174         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1175                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1176                 return 0;
1177         }
1178
1179         return tcp_conn_request(&tcp6_request_sock_ops,
1180                                 &tcp_request_sock_ipv6_ops, sk, skb);
1181
1182 drop:
1183         tcp_listendrop(sk);
1184         return 0; /* don't send reset */
1185 }
1186
1187 static void tcp_v6_restore_cb(struct sk_buff *skb)
1188 {
1189         /* We need to move header back to the beginning if xfrm6_policy_check()
1190          * and tcp_v6_fill_cb() are going to be called again.
1191          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1192          */
1193         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1194                 sizeof(struct inet6_skb_parm));
1195 }
1196
1197 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1198                                          struct request_sock *req,
1199                                          struct dst_entry *dst,
1200                                          struct request_sock *req_unhash,
1201                                          bool *own_req)
1202 {
1203         struct inet_request_sock *ireq;
1204         struct ipv6_pinfo *newnp;
1205         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1206         struct ipv6_txoptions *opt;
1207         struct inet_sock *newinet;
1208         bool found_dup_sk = false;
1209         struct tcp_sock *newtp;
1210         struct sock *newsk;
1211 #ifdef CONFIG_TCP_MD5SIG
1212         struct tcp_md5sig_key *key;
1213         int l3index;
1214 #endif
1215         struct flowi6 fl6;
1216
1217         if (skb->protocol == htons(ETH_P_IP)) {
1218                 /*
1219                  *      v6 mapped
1220                  */
1221
1222                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1223                                              req_unhash, own_req);
1224
1225                 if (!newsk)
1226                         return NULL;
1227
1228                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1229
1230                 newnp = tcp_inet6_sk(newsk);
1231                 newtp = tcp_sk(newsk);
1232
1233                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1234
1235                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1236
1237                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1238                 if (sk_is_mptcp(newsk))
1239                         mptcpv6_handle_mapped(newsk, true);
1240                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1241 #ifdef CONFIG_TCP_MD5SIG
1242                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1243 #endif
1244
1245                 newnp->ipv6_mc_list = NULL;
1246                 newnp->ipv6_ac_list = NULL;
1247                 newnp->ipv6_fl_list = NULL;
1248                 newnp->pktoptions  = NULL;
1249                 newnp->opt         = NULL;
1250                 newnp->mcast_oif   = inet_iif(skb);
1251                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1252                 newnp->rcv_flowinfo = 0;
1253                 if (np->repflow)
1254                         newnp->flow_label = 0;
1255
1256                 /*
1257                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1258                  * here, tcp_create_openreq_child now does this for us, see the comment in
1259                  * that function for the gory details. -acme
1260                  */
1261
1262                 /* It is tricky place. Until this moment IPv4 tcp
1263                    worked with IPv6 icsk.icsk_af_ops.
1264                    Sync it now.
1265                  */
1266                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1267
1268                 return newsk;
1269         }
1270
1271         ireq = inet_rsk(req);
1272
1273         if (sk_acceptq_is_full(sk))
1274                 goto out_overflow;
1275
1276         if (!dst) {
1277                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1278                 if (!dst)
1279                         goto out;
1280         }
1281
1282         newsk = tcp_create_openreq_child(sk, req, skb);
1283         if (!newsk)
1284                 goto out_nonewsk;
1285
1286         /*
1287          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1288          * count here, tcp_create_openreq_child now does this for us, see the
1289          * comment in that function for the gory details. -acme
1290          */
1291
1292         newsk->sk_gso_type = SKB_GSO_TCPV6;
1293         ip6_dst_store(newsk, dst, NULL, NULL);
1294         inet6_sk_rx_dst_set(newsk, skb);
1295
1296         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1297
1298         newtp = tcp_sk(newsk);
1299         newinet = inet_sk(newsk);
1300         newnp = tcp_inet6_sk(newsk);
1301
1302         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1303
1304         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1305         newnp->saddr = ireq->ir_v6_loc_addr;
1306         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1307         newsk->sk_bound_dev_if = ireq->ir_iif;
1308
1309         /* Now IPv6 options...
1310
1311            First: no IPv4 options.
1312          */
1313         newinet->inet_opt = NULL;
1314         newnp->ipv6_mc_list = NULL;
1315         newnp->ipv6_ac_list = NULL;
1316         newnp->ipv6_fl_list = NULL;
1317
1318         /* Clone RX bits */
1319         newnp->rxopt.all = np->rxopt.all;
1320
1321         newnp->pktoptions = NULL;
1322         newnp->opt        = NULL;
1323         newnp->mcast_oif  = tcp_v6_iif(skb);
1324         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1325         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1326         if (np->repflow)
1327                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1328
1329         /* Set ToS of the new socket based upon the value of incoming SYN.
1330          * ECT bits are set later in tcp_init_transfer().
1331          */
1332         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1333                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1334
1335         /* Clone native IPv6 options from listening socket (if any)
1336
1337            Yes, keeping reference count would be much more clever,
1338            but we make one more one thing there: reattach optmem
1339            to newsk.
1340          */
1341         opt = ireq->ipv6_opt;
1342         if (!opt)
1343                 opt = rcu_dereference(np->opt);
1344         if (opt) {
1345                 opt = ipv6_dup_options(newsk, opt);
1346                 RCU_INIT_POINTER(newnp->opt, opt);
1347         }
1348         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1349         if (opt)
1350                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1351                                                     opt->opt_flen;
1352
1353         tcp_ca_openreq_child(newsk, dst);
1354
1355         tcp_sync_mss(newsk, dst_mtu(dst));
1356         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1357
1358         tcp_initialize_rcv_mss(newsk);
1359
1360         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1361         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1362
1363 #ifdef CONFIG_TCP_MD5SIG
1364         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1365
1366         /* Copy over the MD5 key from the original socket */
1367         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1368         if (key) {
1369                 /* We're using one, so create a matching key
1370                  * on the newsk structure. If we fail to get
1371                  * memory, then we end up not copying the key
1372                  * across. Shucks.
1373                  */
1374                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1375                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1376                                sk_gfp_mask(sk, GFP_ATOMIC));
1377         }
1378 #endif
1379
1380         if (__inet_inherit_port(sk, newsk) < 0) {
1381                 inet_csk_prepare_forced_close(newsk);
1382                 tcp_done(newsk);
1383                 goto out;
1384         }
1385         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1386                                        &found_dup_sk);
1387         if (*own_req) {
1388                 tcp_move_syn(newtp, req);
1389
1390                 /* Clone pktoptions received with SYN, if we own the req */
1391                 if (ireq->pktopts) {
1392                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1393                         consume_skb(ireq->pktopts);
1394                         ireq->pktopts = NULL;
1395                         if (newnp->pktoptions)
1396                                 tcp_v6_restore_cb(newnp->pktoptions);
1397                 }
1398         } else {
1399                 if (!req_unhash && found_dup_sk) {
1400                         /* This code path should only be executed in the
1401                          * syncookie case only
1402                          */
1403                         bh_unlock_sock(newsk);
1404                         sock_put(newsk);
1405                         newsk = NULL;
1406                 }
1407         }
1408
1409         return newsk;
1410
1411 out_overflow:
1412         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1413 out_nonewsk:
1414         dst_release(dst);
1415 out:
1416         tcp_listendrop(sk);
1417         return NULL;
1418 }
1419
1420 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1421                                                            u32));
1422 /* The socket must have it's spinlock held when we get
1423  * here, unless it is a TCP_LISTEN socket.
1424  *
1425  * We have a potential double-lock case here, so even when
1426  * doing backlog processing we use the BH locking scheme.
1427  * This is because we cannot sleep with the original spinlock
1428  * held.
1429  */
1430 INDIRECT_CALLABLE_SCOPE
1431 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1432 {
1433         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1434         struct sk_buff *opt_skb = NULL;
1435         enum skb_drop_reason reason;
1436         struct tcp_sock *tp;
1437
1438         /* Imagine: socket is IPv6. IPv4 packet arrives,
1439            goes to IPv4 receive handler and backlogged.
1440            From backlog it always goes here. Kerboom...
1441            Fortunately, tcp_rcv_established and rcv_established
1442            handle them correctly, but it is not case with
1443            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1444          */
1445
1446         if (skb->protocol == htons(ETH_P_IP))
1447                 return tcp_v4_do_rcv(sk, skb);
1448
1449         /*
1450          *      socket locking is here for SMP purposes as backlog rcv
1451          *      is currently called with bh processing disabled.
1452          */
1453
1454         /* Do Stevens' IPV6_PKTOPTIONS.
1455
1456            Yes, guys, it is the only place in our code, where we
1457            may make it not affecting IPv4.
1458            The rest of code is protocol independent,
1459            and I do not like idea to uglify IPv4.
1460
1461            Actually, all the idea behind IPV6_PKTOPTIONS
1462            looks not very well thought. For now we latch
1463            options, received in the last packet, enqueued
1464            by tcp. Feel free to propose better solution.
1465                                                --ANK (980728)
1466          */
1467         if (np->rxopt.all)
1468                 opt_skb = skb_clone_and_charge_r(skb, sk);
1469
1470         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1471         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1472                 struct dst_entry *dst;
1473
1474                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1475                                                 lockdep_sock_is_held(sk));
1476
1477                 sock_rps_save_rxhash(sk, skb);
1478                 sk_mark_napi_id(sk, skb);
1479                 if (dst) {
1480                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1481                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1482                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1483                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1484                                 dst_release(dst);
1485                         }
1486                 }
1487
1488                 tcp_rcv_established(sk, skb);
1489                 if (opt_skb)
1490                         goto ipv6_pktoptions;
1491                 return 0;
1492         }
1493
1494         if (tcp_checksum_complete(skb))
1495                 goto csum_err;
1496
1497         if (sk->sk_state == TCP_LISTEN) {
1498                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1499
1500                 if (!nsk)
1501                         goto discard;
1502
1503                 if (nsk != sk) {
1504                         if (tcp_child_process(sk, nsk, skb))
1505                                 goto reset;
1506                         if (opt_skb)
1507                                 __kfree_skb(opt_skb);
1508                         return 0;
1509                 }
1510         } else
1511                 sock_rps_save_rxhash(sk, skb);
1512
1513         if (tcp_rcv_state_process(sk, skb))
1514                 goto reset;
1515         if (opt_skb)
1516                 goto ipv6_pktoptions;
1517         return 0;
1518
1519 reset:
1520         tcp_v6_send_reset(sk, skb);
1521 discard:
1522         if (opt_skb)
1523                 __kfree_skb(opt_skb);
1524         kfree_skb_reason(skb, reason);
1525         return 0;
1526 csum_err:
1527         reason = SKB_DROP_REASON_TCP_CSUM;
1528         trace_tcp_bad_csum(skb);
1529         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1531         goto discard;
1532
1533
1534 ipv6_pktoptions:
1535         /* Do you ask, what is it?
1536
1537            1. skb was enqueued by tcp.
1538            2. skb is added to tail of read queue, rather than out of order.
1539            3. socket is not in passive state.
1540            4. Finally, it really contains options, which user wants to receive.
1541          */
1542         tp = tcp_sk(sk);
1543         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1544             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1545                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1546                         np->mcast_oif = tcp_v6_iif(opt_skb);
1547                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1548                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1549                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1550                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1551                 if (np->repflow)
1552                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1553                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1554                         tcp_v6_restore_cb(opt_skb);
1555                         opt_skb = xchg(&np->pktoptions, opt_skb);
1556                 } else {
1557                         __kfree_skb(opt_skb);
1558                         opt_skb = xchg(&np->pktoptions, NULL);
1559                 }
1560         }
1561
1562         consume_skb(opt_skb);
1563         return 0;
1564 }
1565
1566 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1567                            const struct tcphdr *th)
1568 {
1569         /* This is tricky: we move IP6CB at its correct location into
1570          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1571          * _decode_session6() uses IP6CB().
1572          * barrier() makes sure compiler won't play aliasing games.
1573          */
1574         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1575                 sizeof(struct inet6_skb_parm));
1576         barrier();
1577
1578         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1579         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1580                                     skb->len - th->doff*4);
1581         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1582         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1583         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1584         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1585         TCP_SKB_CB(skb)->sacked = 0;
1586         TCP_SKB_CB(skb)->has_rxtstamp =
1587                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1588 }
1589
1590 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1591 {
1592         enum skb_drop_reason drop_reason;
1593         int sdif = inet6_sdif(skb);
1594         int dif = inet6_iif(skb);
1595         const struct tcphdr *th;
1596         const struct ipv6hdr *hdr;
1597         bool refcounted;
1598         struct sock *sk;
1599         int ret;
1600         struct net *net = dev_net(skb->dev);
1601
1602         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1603         if (skb->pkt_type != PACKET_HOST)
1604                 goto discard_it;
1605
1606         /*
1607          *      Count it even if it's bad.
1608          */
1609         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1610
1611         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1612                 goto discard_it;
1613
1614         th = (const struct tcphdr *)skb->data;
1615
1616         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1617                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1618                 goto bad_packet;
1619         }
1620         if (!pskb_may_pull(skb, th->doff*4))
1621                 goto discard_it;
1622
1623         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1624                 goto csum_error;
1625
1626         th = (const struct tcphdr *)skb->data;
1627         hdr = ipv6_hdr(skb);
1628
1629 lookup:
1630         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1631                                 th->source, th->dest, inet6_iif(skb), sdif,
1632                                 &refcounted);
1633         if (!sk)
1634                 goto no_tcp_socket;
1635
1636 process:
1637         if (sk->sk_state == TCP_TIME_WAIT)
1638                 goto do_time_wait;
1639
1640         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1641                 struct request_sock *req = inet_reqsk(sk);
1642                 bool req_stolen = false;
1643                 struct sock *nsk;
1644
1645                 sk = req->rsk_listener;
1646                 drop_reason = tcp_inbound_md5_hash(sk, skb,
1647                                                    &hdr->saddr, &hdr->daddr,
1648                                                    AF_INET6, dif, sdif);
1649                 if (drop_reason) {
1650                         sk_drops_add(sk, skb);
1651                         reqsk_put(req);
1652                         goto discard_it;
1653                 }
1654                 if (tcp_checksum_complete(skb)) {
1655                         reqsk_put(req);
1656                         goto csum_error;
1657                 }
1658                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660                         if (!nsk) {
1661                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1662                                 goto lookup;
1663                         }
1664                         sk = nsk;
1665                         /* reuseport_migrate_sock() has already held one sk_refcnt
1666                          * before returning.
1667                          */
1668                 } else {
1669                         sock_hold(sk);
1670                 }
1671                 refcounted = true;
1672                 nsk = NULL;
1673                 if (!tcp_filter(sk, skb)) {
1674                         th = (const struct tcphdr *)skb->data;
1675                         hdr = ipv6_hdr(skb);
1676                         tcp_v6_fill_cb(skb, hdr, th);
1677                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678                 } else {
1679                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680                 }
1681                 if (!nsk) {
1682                         reqsk_put(req);
1683                         if (req_stolen) {
1684                                 /* Another cpu got exclusive access to req
1685                                  * and created a full blown socket.
1686                                  * Try to feed this packet to this socket
1687                                  * instead of discarding it.
1688                                  */
1689                                 tcp_v6_restore_cb(skb);
1690                                 sock_put(sk);
1691                                 goto lookup;
1692                         }
1693                         goto discard_and_relse;
1694                 }
1695                 if (nsk == sk) {
1696                         reqsk_put(req);
1697                         tcp_v6_restore_cb(skb);
1698                 } else if (tcp_child_process(sk, nsk, skb)) {
1699                         tcp_v6_send_reset(nsk, skb);
1700                         goto discard_and_relse;
1701                 } else {
1702                         sock_put(sk);
1703                         return 0;
1704                 }
1705         }
1706
1707         if (static_branch_unlikely(&ip6_min_hopcount)) {
1708                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1709                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1710                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1711                         goto discard_and_relse;
1712                 }
1713         }
1714
1715         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1716                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1717                 goto discard_and_relse;
1718         }
1719
1720         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1721                                            AF_INET6, dif, sdif);
1722         if (drop_reason)
1723                 goto discard_and_relse;
1724
1725         if (tcp_filter(sk, skb)) {
1726                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1727                 goto discard_and_relse;
1728         }
1729         th = (const struct tcphdr *)skb->data;
1730         hdr = ipv6_hdr(skb);
1731         tcp_v6_fill_cb(skb, hdr, th);
1732
1733         skb->dev = NULL;
1734
1735         if (sk->sk_state == TCP_LISTEN) {
1736                 ret = tcp_v6_do_rcv(sk, skb);
1737                 goto put_and_return;
1738         }
1739
1740         sk_incoming_cpu_update(sk);
1741
1742         bh_lock_sock_nested(sk);
1743         tcp_segs_in(tcp_sk(sk), skb);
1744         ret = 0;
1745         if (!sock_owned_by_user(sk)) {
1746                 ret = tcp_v6_do_rcv(sk, skb);
1747         } else {
1748                 if (tcp_add_backlog(sk, skb, &drop_reason))
1749                         goto discard_and_relse;
1750         }
1751         bh_unlock_sock(sk);
1752 put_and_return:
1753         if (refcounted)
1754                 sock_put(sk);
1755         return ret ? -1 : 0;
1756
1757 no_tcp_socket:
1758         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1759         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1760                 goto discard_it;
1761
1762         tcp_v6_fill_cb(skb, hdr, th);
1763
1764         if (tcp_checksum_complete(skb)) {
1765 csum_error:
1766                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1767                 trace_tcp_bad_csum(skb);
1768                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1769 bad_packet:
1770                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1771         } else {
1772                 tcp_v6_send_reset(NULL, skb);
1773         }
1774
1775 discard_it:
1776         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1777         kfree_skb_reason(skb, drop_reason);
1778         return 0;
1779
1780 discard_and_relse:
1781         sk_drops_add(sk, skb);
1782         if (refcounted)
1783                 sock_put(sk);
1784         goto discard_it;
1785
1786 do_time_wait:
1787         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1788                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1789                 inet_twsk_put(inet_twsk(sk));
1790                 goto discard_it;
1791         }
1792
1793         tcp_v6_fill_cb(skb, hdr, th);
1794
1795         if (tcp_checksum_complete(skb)) {
1796                 inet_twsk_put(inet_twsk(sk));
1797                 goto csum_error;
1798         }
1799
1800         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1801         case TCP_TW_SYN:
1802         {
1803                 struct sock *sk2;
1804
1805                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1806                                             skb, __tcp_hdrlen(th),
1807                                             &ipv6_hdr(skb)->saddr, th->source,
1808                                             &ipv6_hdr(skb)->daddr,
1809                                             ntohs(th->dest),
1810                                             tcp_v6_iif_l3_slave(skb),
1811                                             sdif);
1812                 if (sk2) {
1813                         struct inet_timewait_sock *tw = inet_twsk(sk);
1814                         inet_twsk_deschedule_put(tw);
1815                         sk = sk2;
1816                         tcp_v6_restore_cb(skb);
1817                         refcounted = false;
1818                         goto process;
1819                 }
1820         }
1821                 /* to ACK */
1822                 fallthrough;
1823         case TCP_TW_ACK:
1824                 tcp_v6_timewait_ack(sk, skb);
1825                 break;
1826         case TCP_TW_RST:
1827                 tcp_v6_send_reset(sk, skb);
1828                 inet_twsk_deschedule_put(inet_twsk(sk));
1829                 goto discard_it;
1830         case TCP_TW_SUCCESS:
1831                 ;
1832         }
1833         goto discard_it;
1834 }
1835
1836 void tcp_v6_early_demux(struct sk_buff *skb)
1837 {
1838         struct net *net = dev_net(skb->dev);
1839         const struct ipv6hdr *hdr;
1840         const struct tcphdr *th;
1841         struct sock *sk;
1842
1843         if (skb->pkt_type != PACKET_HOST)
1844                 return;
1845
1846         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1847                 return;
1848
1849         hdr = ipv6_hdr(skb);
1850         th = tcp_hdr(skb);
1851
1852         if (th->doff < sizeof(struct tcphdr) / 4)
1853                 return;
1854
1855         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1856         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1857                                         &hdr->saddr, th->source,
1858                                         &hdr->daddr, ntohs(th->dest),
1859                                         inet6_iif(skb), inet6_sdif(skb));
1860         if (sk) {
1861                 skb->sk = sk;
1862                 skb->destructor = sock_edemux;
1863                 if (sk_fullsock(sk)) {
1864                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1865
1866                         if (dst)
1867                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1868                         if (dst &&
1869                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1870                                 skb_dst_set_noref(skb, dst);
1871                 }
1872         }
1873 }
1874
1875 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1876         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1877         .twsk_unique    = tcp_twsk_unique,
1878         .twsk_destructor = tcp_twsk_destructor,
1879 };
1880
1881 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1882 {
1883         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1884 }
1885
1886 const struct inet_connection_sock_af_ops ipv6_specific = {
1887         .queue_xmit        = inet6_csk_xmit,
1888         .send_check        = tcp_v6_send_check,
1889         .rebuild_header    = inet6_sk_rebuild_header,
1890         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1891         .conn_request      = tcp_v6_conn_request,
1892         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1893         .net_header_len    = sizeof(struct ipv6hdr),
1894         .net_frag_header_len = sizeof(struct frag_hdr),
1895         .setsockopt        = ipv6_setsockopt,
1896         .getsockopt        = ipv6_getsockopt,
1897         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1898         .sockaddr_len      = sizeof(struct sockaddr_in6),
1899         .mtu_reduced       = tcp_v6_mtu_reduced,
1900 };
1901
1902 #ifdef CONFIG_TCP_MD5SIG
1903 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1904         .md5_lookup     =       tcp_v6_md5_lookup,
1905         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1906         .md5_parse      =       tcp_v6_parse_md5_keys,
1907 };
1908 #endif
1909
1910 /*
1911  *      TCP over IPv4 via INET6 API
1912  */
1913 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1914         .queue_xmit        = ip_queue_xmit,
1915         .send_check        = tcp_v4_send_check,
1916         .rebuild_header    = inet_sk_rebuild_header,
1917         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1918         .conn_request      = tcp_v6_conn_request,
1919         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1920         .net_header_len    = sizeof(struct iphdr),
1921         .setsockopt        = ipv6_setsockopt,
1922         .getsockopt        = ipv6_getsockopt,
1923         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1924         .sockaddr_len      = sizeof(struct sockaddr_in6),
1925         .mtu_reduced       = tcp_v4_mtu_reduced,
1926 };
1927
1928 #ifdef CONFIG_TCP_MD5SIG
1929 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1930         .md5_lookup     =       tcp_v4_md5_lookup,
1931         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1932         .md5_parse      =       tcp_v6_parse_md5_keys,
1933 };
1934 #endif
1935
1936 /* NOTE: A lot of things set to zero explicitly by call to
1937  *       sk_alloc() so need not be done here.
1938  */
1939 static int tcp_v6_init_sock(struct sock *sk)
1940 {
1941         struct inet_connection_sock *icsk = inet_csk(sk);
1942
1943         tcp_init_sock(sk);
1944
1945         icsk->icsk_af_ops = &ipv6_specific;
1946
1947 #ifdef CONFIG_TCP_MD5SIG
1948         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1949 #endif
1950
1951         return 0;
1952 }
1953
1954 static void tcp_v6_destroy_sock(struct sock *sk)
1955 {
1956         tcp_v4_destroy_sock(sk);
1957         inet6_destroy_sock(sk);
1958 }
1959
1960 #ifdef CONFIG_PROC_FS
1961 /* Proc filesystem TCPv6 sock list dumping. */
1962 static void get_openreq6(struct seq_file *seq,
1963                          const struct request_sock *req, int i)
1964 {
1965         long ttd = req->rsk_timer.expires - jiffies;
1966         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1967         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1968
1969         if (ttd < 0)
1970                 ttd = 0;
1971
1972         seq_printf(seq,
1973                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1974                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1975                    i,
1976                    src->s6_addr32[0], src->s6_addr32[1],
1977                    src->s6_addr32[2], src->s6_addr32[3],
1978                    inet_rsk(req)->ir_num,
1979                    dest->s6_addr32[0], dest->s6_addr32[1],
1980                    dest->s6_addr32[2], dest->s6_addr32[3],
1981                    ntohs(inet_rsk(req)->ir_rmt_port),
1982                    TCP_SYN_RECV,
1983                    0, 0, /* could print option size, but that is af dependent. */
1984                    1,   /* timers active (only the expire timer) */
1985                    jiffies_to_clock_t(ttd),
1986                    req->num_timeout,
1987                    from_kuid_munged(seq_user_ns(seq),
1988                                     sock_i_uid(req->rsk_listener)),
1989                    0,  /* non standard timer */
1990                    0, /* open_requests have no inode */
1991                    0, req);
1992 }
1993
1994 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1995 {
1996         const struct in6_addr *dest, *src;
1997         __u16 destp, srcp;
1998         int timer_active;
1999         unsigned long timer_expires;
2000         const struct inet_sock *inet = inet_sk(sp);
2001         const struct tcp_sock *tp = tcp_sk(sp);
2002         const struct inet_connection_sock *icsk = inet_csk(sp);
2003         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2004         int rx_queue;
2005         int state;
2006
2007         dest  = &sp->sk_v6_daddr;
2008         src   = &sp->sk_v6_rcv_saddr;
2009         destp = ntohs(inet->inet_dport);
2010         srcp  = ntohs(inet->inet_sport);
2011
2012         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2013             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2014             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2015                 timer_active    = 1;
2016                 timer_expires   = icsk->icsk_timeout;
2017         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2018                 timer_active    = 4;
2019                 timer_expires   = icsk->icsk_timeout;
2020         } else if (timer_pending(&sp->sk_timer)) {
2021                 timer_active    = 2;
2022                 timer_expires   = sp->sk_timer.expires;
2023         } else {
2024                 timer_active    = 0;
2025                 timer_expires = jiffies;
2026         }
2027
2028         state = inet_sk_state_load(sp);
2029         if (state == TCP_LISTEN)
2030                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2031         else
2032                 /* Because we don't lock the socket,
2033                  * we might find a transient negative value.
2034                  */
2035                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2036                                       READ_ONCE(tp->copied_seq), 0);
2037
2038         seq_printf(seq,
2039                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2040                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2041                    i,
2042                    src->s6_addr32[0], src->s6_addr32[1],
2043                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2044                    dest->s6_addr32[0], dest->s6_addr32[1],
2045                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2046                    state,
2047                    READ_ONCE(tp->write_seq) - tp->snd_una,
2048                    rx_queue,
2049                    timer_active,
2050                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2051                    icsk->icsk_retransmits,
2052                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2053                    icsk->icsk_probes_out,
2054                    sock_i_ino(sp),
2055                    refcount_read(&sp->sk_refcnt), sp,
2056                    jiffies_to_clock_t(icsk->icsk_rto),
2057                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2058                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2059                    tcp_snd_cwnd(tp),
2060                    state == TCP_LISTEN ?
2061                         fastopenq->max_qlen :
2062                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2063                    );
2064 }
2065
2066 static void get_timewait6_sock(struct seq_file *seq,
2067                                struct inet_timewait_sock *tw, int i)
2068 {
2069         long delta = tw->tw_timer.expires - jiffies;
2070         const struct in6_addr *dest, *src;
2071         __u16 destp, srcp;
2072
2073         dest = &tw->tw_v6_daddr;
2074         src  = &tw->tw_v6_rcv_saddr;
2075         destp = ntohs(tw->tw_dport);
2076         srcp  = ntohs(tw->tw_sport);
2077
2078         seq_printf(seq,
2079                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2080                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2081                    i,
2082                    src->s6_addr32[0], src->s6_addr32[1],
2083                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2084                    dest->s6_addr32[0], dest->s6_addr32[1],
2085                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2086                    tw->tw_substate, 0, 0,
2087                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2088                    refcount_read(&tw->tw_refcnt), tw);
2089 }
2090
2091 static int tcp6_seq_show(struct seq_file *seq, void *v)
2092 {
2093         struct tcp_iter_state *st;
2094         struct sock *sk = v;
2095
2096         if (v == SEQ_START_TOKEN) {
2097                 seq_puts(seq,
2098                          "  sl  "
2099                          "local_address                         "
2100                          "remote_address                        "
2101                          "st tx_queue rx_queue tr tm->when retrnsmt"
2102                          "   uid  timeout inode\n");
2103                 goto out;
2104         }
2105         st = seq->private;
2106
2107         if (sk->sk_state == TCP_TIME_WAIT)
2108                 get_timewait6_sock(seq, v, st->num);
2109         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2110                 get_openreq6(seq, v, st->num);
2111         else
2112                 get_tcp6_sock(seq, v, st->num);
2113 out:
2114         return 0;
2115 }
2116
2117 static const struct seq_operations tcp6_seq_ops = {
2118         .show           = tcp6_seq_show,
2119         .start          = tcp_seq_start,
2120         .next           = tcp_seq_next,
2121         .stop           = tcp_seq_stop,
2122 };
2123
2124 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2125         .family         = AF_INET6,
2126 };
2127
2128 int __net_init tcp6_proc_init(struct net *net)
2129 {
2130         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2131                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2132                 return -ENOMEM;
2133         return 0;
2134 }
2135
2136 void tcp6_proc_exit(struct net *net)
2137 {
2138         remove_proc_entry("tcp6", net->proc_net);
2139 }
2140 #endif
2141
2142 struct proto tcpv6_prot = {
2143         .name                   = "TCPv6",
2144         .owner                  = THIS_MODULE,
2145         .close                  = tcp_close,
2146         .pre_connect            = tcp_v6_pre_connect,
2147         .connect                = tcp_v6_connect,
2148         .disconnect             = tcp_disconnect,
2149         .accept                 = inet_csk_accept,
2150         .ioctl                  = tcp_ioctl,
2151         .init                   = tcp_v6_init_sock,
2152         .destroy                = tcp_v6_destroy_sock,
2153         .shutdown               = tcp_shutdown,
2154         .setsockopt             = tcp_setsockopt,
2155         .getsockopt             = tcp_getsockopt,
2156         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2157         .keepalive              = tcp_set_keepalive,
2158         .recvmsg                = tcp_recvmsg,
2159         .sendmsg                = tcp_sendmsg,
2160         .sendpage               = tcp_sendpage,
2161         .backlog_rcv            = tcp_v6_do_rcv,
2162         .release_cb             = tcp_release_cb,
2163         .hash                   = inet6_hash,
2164         .unhash                 = inet_unhash,
2165         .get_port               = inet_csk_get_port,
2166         .put_port               = inet_put_port,
2167 #ifdef CONFIG_BPF_SYSCALL
2168         .psock_update_sk_prot   = tcp_bpf_update_proto,
2169 #endif
2170         .enter_memory_pressure  = tcp_enter_memory_pressure,
2171         .leave_memory_pressure  = tcp_leave_memory_pressure,
2172         .stream_memory_free     = tcp_stream_memory_free,
2173         .sockets_allocated      = &tcp_sockets_allocated,
2174
2175         .memory_allocated       = &tcp_memory_allocated,
2176         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2177
2178         .memory_pressure        = &tcp_memory_pressure,
2179         .orphan_count           = &tcp_orphan_count,
2180         .sysctl_mem             = sysctl_tcp_mem,
2181         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2182         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2183         .max_header             = MAX_TCP_HEADER,
2184         .obj_size               = sizeof(struct tcp6_sock),
2185         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2186         .twsk_prot              = &tcp6_timewait_sock_ops,
2187         .rsk_prot               = &tcp6_request_sock_ops,
2188         .h.hashinfo             = NULL,
2189         .no_autobind            = true,
2190         .diag_destroy           = tcp_abort,
2191 };
2192 EXPORT_SYMBOL_GPL(tcpv6_prot);
2193
2194 static const struct inet6_protocol tcpv6_protocol = {
2195         .handler        =       tcp_v6_rcv,
2196         .err_handler    =       tcp_v6_err,
2197         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2198 };
2199
2200 static struct inet_protosw tcpv6_protosw = {
2201         .type           =       SOCK_STREAM,
2202         .protocol       =       IPPROTO_TCP,
2203         .prot           =       &tcpv6_prot,
2204         .ops            =       &inet6_stream_ops,
2205         .flags          =       INET_PROTOSW_PERMANENT |
2206                                 INET_PROTOSW_ICSK,
2207 };
2208
2209 static int __net_init tcpv6_net_init(struct net *net)
2210 {
2211         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2212                                     SOCK_RAW, IPPROTO_TCP, net);
2213 }
2214
2215 static void __net_exit tcpv6_net_exit(struct net *net)
2216 {
2217         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2218 }
2219
2220 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2221 {
2222         tcp_twsk_purge(net_exit_list, AF_INET6);
2223 }
2224
2225 static struct pernet_operations tcpv6_net_ops = {
2226         .init       = tcpv6_net_init,
2227         .exit       = tcpv6_net_exit,
2228         .exit_batch = tcpv6_net_exit_batch,
2229 };
2230
2231 int __init tcpv6_init(void)
2232 {
2233         int ret;
2234
2235         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2236         if (ret)
2237                 goto out;
2238
2239         /* register inet6 protocol */
2240         ret = inet6_register_protosw(&tcpv6_protosw);
2241         if (ret)
2242                 goto out_tcpv6_protocol;
2243
2244         ret = register_pernet_subsys(&tcpv6_net_ops);
2245         if (ret)
2246                 goto out_tcpv6_protosw;
2247
2248         ret = mptcpv6_init();
2249         if (ret)
2250                 goto out_tcpv6_pernet_subsys;
2251
2252 out:
2253         return ret;
2254
2255 out_tcpv6_pernet_subsys:
2256         unregister_pernet_subsys(&tcpv6_net_ops);
2257 out_tcpv6_protosw:
2258         inet6_unregister_protosw(&tcpv6_protosw);
2259 out_tcpv6_protocol:
2260         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2261         goto out;
2262 }
2263
2264 void tcpv6_exit(void)
2265 {
2266         unregister_pernet_subsys(&tcpv6_net_ops);
2267         inet6_unregister_protosw(&tcpv6_protosw);
2268         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2269 }