GNU Linux-libre 6.1.90-gnu
[releases.git] / net / ipv6 / tcp_ipv6.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      TCP over IPv6
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on:
10  *      linux/net/ipv4/tcp.c
11  *      linux/net/ipv4/tcp_input.c
12  *      linux/net/ipv4/tcp_output.c
13  *
14  *      Fixes:
15  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
16  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
17  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
18  *                                      a single port at the same time.
19  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
20  */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void     tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void     tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73                                       struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84                                                    const struct in6_addr *addr,
85                                                    int l3index)
86 {
87         return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98         unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100         return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105         struct dst_entry *dst = skb_dst(skb);
106
107         if (dst && dst_hold_safe(dst)) {
108                 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110                 rcu_assign_pointer(sk->sk_rx_dst, dst);
111                 sk->sk_rx_dst_ifindex = skb->skb_iif;
112                 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113         }
114 }
115
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118         return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119                                 ipv6_hdr(skb)->saddr.s6_addr32,
120                                 tcp_hdr(skb)->dest,
121                                 tcp_hdr(skb)->source);
122 }
123
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126         return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127                                    ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131                               int addr_len)
132 {
133         /* This check is replicated from tcp_v6_connect() and intended to
134          * prevent BPF program called below from accessing bytes that are out
135          * of the bound specified by user in addr_len.
136          */
137         if (addr_len < SIN6_LEN_RFC2133)
138                 return -EINVAL;
139
140         sock_owned_by_me(sk);
141
142         return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146                           int addr_len)
147 {
148         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149         struct inet_connection_sock *icsk = inet_csk(sk);
150         struct in6_addr *saddr = NULL, *final_p, final;
151         struct inet_timewait_death_row *tcp_death_row;
152         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153         struct inet_sock *inet = inet_sk(sk);
154         struct tcp_sock *tp = tcp_sk(sk);
155         struct net *net = sock_net(sk);
156         struct ipv6_txoptions *opt;
157         struct dst_entry *dst;
158         struct flowi6 fl6;
159         int addr_type;
160         int err;
161
162         if (addr_len < SIN6_LEN_RFC2133)
163                 return -EINVAL;
164
165         if (usin->sin6_family != AF_INET6)
166                 return -EAFNOSUPPORT;
167
168         memset(&fl6, 0, sizeof(fl6));
169
170         if (np->sndflow) {
171                 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
172                 IP6_ECN_flow_init(fl6.flowlabel);
173                 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
174                         struct ip6_flowlabel *flowlabel;
175                         flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
176                         if (IS_ERR(flowlabel))
177                                 return -EINVAL;
178                         fl6_sock_release(flowlabel);
179                 }
180         }
181
182         /*
183          *      connect() to INADDR_ANY means loopback (BSD'ism).
184          */
185
186         if (ipv6_addr_any(&usin->sin6_addr)) {
187                 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
188                         ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189                                                &usin->sin6_addr);
190                 else
191                         usin->sin6_addr = in6addr_loopback;
192         }
193
194         addr_type = ipv6_addr_type(&usin->sin6_addr);
195
196         if (addr_type & IPV6_ADDR_MULTICAST)
197                 return -ENETUNREACH;
198
199         if (addr_type&IPV6_ADDR_LINKLOCAL) {
200                 if (addr_len >= sizeof(struct sockaddr_in6) &&
201                     usin->sin6_scope_id) {
202                         /* If interface is set while binding, indices
203                          * must coincide.
204                          */
205                         if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206                                 return -EINVAL;
207
208                         sk->sk_bound_dev_if = usin->sin6_scope_id;
209                 }
210
211                 /* Connect to link-local address requires an interface */
212                 if (!sk->sk_bound_dev_if)
213                         return -EINVAL;
214         }
215
216         if (tp->rx_opt.ts_recent_stamp &&
217             !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
218                 tp->rx_opt.ts_recent = 0;
219                 tp->rx_opt.ts_recent_stamp = 0;
220                 WRITE_ONCE(tp->write_seq, 0);
221         }
222
223         sk->sk_v6_daddr = usin->sin6_addr;
224         np->flow_label = fl6.flowlabel;
225
226         /*
227          *      TCP over IPv4
228          */
229
230         if (addr_type & IPV6_ADDR_MAPPED) {
231                 u32 exthdrlen = icsk->icsk_ext_hdr_len;
232                 struct sockaddr_in sin;
233
234                 if (ipv6_only_sock(sk))
235                         return -ENETUNREACH;
236
237                 sin.sin_family = AF_INET;
238                 sin.sin_port = usin->sin6_port;
239                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240
241                 /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
242                 WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
243                 if (sk_is_mptcp(sk))
244                         mptcpv6_handle_mapped(sk, true);
245                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
246 #ifdef CONFIG_TCP_MD5SIG
247                 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 #endif
249
250                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251
252                 if (err) {
253                         icsk->icsk_ext_hdr_len = exthdrlen;
254                         /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
255                         WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
256                         if (sk_is_mptcp(sk))
257                                 mptcpv6_handle_mapped(sk, false);
258                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
259 #ifdef CONFIG_TCP_MD5SIG
260                         tp->af_specific = &tcp_sock_ipv6_specific;
261 #endif
262                         goto failure;
263                 }
264                 np->saddr = sk->sk_v6_rcv_saddr;
265
266                 return err;
267         }
268
269         if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
270                 saddr = &sk->sk_v6_rcv_saddr;
271
272         fl6.flowi6_proto = IPPROTO_TCP;
273         fl6.daddr = sk->sk_v6_daddr;
274         fl6.saddr = saddr ? *saddr : np->saddr;
275         fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
276         fl6.flowi6_oif = sk->sk_bound_dev_if;
277         fl6.flowi6_mark = sk->sk_mark;
278         fl6.fl6_dport = usin->sin6_port;
279         fl6.fl6_sport = inet->inet_sport;
280         fl6.flowi6_uid = sk->sk_uid;
281
282         opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
283         final_p = fl6_update_dst(&fl6, opt, &final);
284
285         security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
286
287         dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
288         if (IS_ERR(dst)) {
289                 err = PTR_ERR(dst);
290                 goto failure;
291         }
292
293         tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
294
295         if (!saddr) {
296                 saddr = &fl6.saddr;
297
298                 err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
299                 if (err)
300                         goto failure;
301         }
302
303         /* set the source address */
304         np->saddr = *saddr;
305         inet->inet_rcv_saddr = LOOPBACK4_IPV6;
306
307         sk->sk_gso_type = SKB_GSO_TCPV6;
308         ip6_dst_store(sk, dst, NULL, NULL);
309
310         icsk->icsk_ext_hdr_len = 0;
311         if (opt)
312                 icsk->icsk_ext_hdr_len = opt->opt_flen +
313                                          opt->opt_nflen;
314
315         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
316
317         inet->inet_dport = usin->sin6_port;
318
319         tcp_set_state(sk, TCP_SYN_SENT);
320         err = inet6_hash_connect(tcp_death_row, sk);
321         if (err)
322                 goto late_failure;
323
324         sk_set_txhash(sk);
325
326         if (likely(!tp->repair)) {
327                 if (!tp->write_seq)
328                         WRITE_ONCE(tp->write_seq,
329                                    secure_tcpv6_seq(np->saddr.s6_addr32,
330                                                     sk->sk_v6_daddr.s6_addr32,
331                                                     inet->inet_sport,
332                                                     inet->inet_dport));
333                 tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
334                                                    sk->sk_v6_daddr.s6_addr32);
335         }
336
337         if (tcp_fastopen_defer_connect(sk, &err))
338                 return err;
339         if (err)
340                 goto late_failure;
341
342         err = tcp_connect(sk);
343         if (err)
344                 goto late_failure;
345
346         return 0;
347
348 late_failure:
349         tcp_set_state(sk, TCP_CLOSE);
350         inet_bhash2_reset_saddr(sk);
351 failure:
352         inet->inet_dport = 0;
353         sk->sk_route_caps = 0;
354         return err;
355 }
356
357 static void tcp_v6_mtu_reduced(struct sock *sk)
358 {
359         struct dst_entry *dst;
360         u32 mtu;
361
362         if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
363                 return;
364
365         mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
366
367         /* Drop requests trying to increase our current mss.
368          * Check done in __ip6_rt_update_pmtu() is too late.
369          */
370         if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
371                 return;
372
373         dst = inet6_csk_update_pmtu(sk, mtu);
374         if (!dst)
375                 return;
376
377         if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
378                 tcp_sync_mss(sk, dst_mtu(dst));
379                 tcp_simple_retransmit(sk);
380         }
381 }
382
383 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
384                 u8 type, u8 code, int offset, __be32 info)
385 {
386         const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
387         const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
388         struct net *net = dev_net(skb->dev);
389         struct request_sock *fastopen;
390         struct ipv6_pinfo *np;
391         struct tcp_sock *tp;
392         __u32 seq, snd_una;
393         struct sock *sk;
394         bool fatal;
395         int err;
396
397         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
398                                         &hdr->daddr, th->dest,
399                                         &hdr->saddr, ntohs(th->source),
400                                         skb->dev->ifindex, inet6_sdif(skb));
401
402         if (!sk) {
403                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
404                                   ICMP6_MIB_INERRORS);
405                 return -ENOENT;
406         }
407
408         if (sk->sk_state == TCP_TIME_WAIT) {
409                 inet_twsk_put(inet_twsk(sk));
410                 return 0;
411         }
412         seq = ntohl(th->seq);
413         fatal = icmpv6_err_convert(type, code, &err);
414         if (sk->sk_state == TCP_NEW_SYN_RECV) {
415                 tcp_req_err(sk, seq, fatal);
416                 return 0;
417         }
418
419         bh_lock_sock(sk);
420         if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
421                 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
422
423         if (sk->sk_state == TCP_CLOSE)
424                 goto out;
425
426         if (static_branch_unlikely(&ip6_min_hopcount)) {
427                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
428                 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
429                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
430                         goto out;
431                 }
432         }
433
434         tp = tcp_sk(sk);
435         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
436         fastopen = rcu_dereference(tp->fastopen_rsk);
437         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
438         if (sk->sk_state != TCP_LISTEN &&
439             !between(seq, snd_una, tp->snd_nxt)) {
440                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
441                 goto out;
442         }
443
444         np = tcp_inet6_sk(sk);
445
446         if (type == NDISC_REDIRECT) {
447                 if (!sock_owned_by_user(sk)) {
448                         struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
449
450                         if (dst)
451                                 dst->ops->redirect(dst, sk, skb);
452                 }
453                 goto out;
454         }
455
456         if (type == ICMPV6_PKT_TOOBIG) {
457                 u32 mtu = ntohl(info);
458
459                 /* We are not interested in TCP_LISTEN and open_requests
460                  * (SYN-ACKs send out by Linux are always <576bytes so
461                  * they should go through unfragmented).
462                  */
463                 if (sk->sk_state == TCP_LISTEN)
464                         goto out;
465
466                 if (!ip6_sk_accept_pmtu(sk))
467                         goto out;
468
469                 if (mtu < IPV6_MIN_MTU)
470                         goto out;
471
472                 WRITE_ONCE(tp->mtu_info, mtu);
473
474                 if (!sock_owned_by_user(sk))
475                         tcp_v6_mtu_reduced(sk);
476                 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
477                                            &sk->sk_tsq_flags))
478                         sock_hold(sk);
479                 goto out;
480         }
481
482
483         /* Might be for an request_sock */
484         switch (sk->sk_state) {
485         case TCP_SYN_SENT:
486         case TCP_SYN_RECV:
487                 /* Only in fast or simultaneous open. If a fast open socket is
488                  * already accepted it is treated as a connected one below.
489                  */
490                 if (fastopen && !fastopen->sk)
491                         break;
492
493                 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
494
495                 if (!sock_owned_by_user(sk)) {
496                         sk->sk_err = err;
497                         sk_error_report(sk);            /* Wake people up to see the error (see connect in sock.c) */
498
499                         tcp_done(sk);
500                 } else
501                         sk->sk_err_soft = err;
502                 goto out;
503         case TCP_LISTEN:
504                 break;
505         default:
506                 /* check if this ICMP message allows revert of backoff.
507                  * (see RFC 6069)
508                  */
509                 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
510                     code == ICMPV6_NOROUTE)
511                         tcp_ld_RTO_revert(sk, seq);
512         }
513
514         if (!sock_owned_by_user(sk) && np->recverr) {
515                 sk->sk_err = err;
516                 sk_error_report(sk);
517         } else
518                 sk->sk_err_soft = err;
519
520 out:
521         bh_unlock_sock(sk);
522         sock_put(sk);
523         return 0;
524 }
525
526
527 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
528                               struct flowi *fl,
529                               struct request_sock *req,
530                               struct tcp_fastopen_cookie *foc,
531                               enum tcp_synack_type synack_type,
532                               struct sk_buff *syn_skb)
533 {
534         struct inet_request_sock *ireq = inet_rsk(req);
535         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
536         struct ipv6_txoptions *opt;
537         struct flowi6 *fl6 = &fl->u.ip6;
538         struct sk_buff *skb;
539         int err = -ENOMEM;
540         u8 tclass;
541
542         /* First, grab a route. */
543         if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
544                                                IPPROTO_TCP)) == NULL)
545                 goto done;
546
547         skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
548
549         if (skb) {
550                 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
551                                     &ireq->ir_v6_rmt_addr);
552
553                 fl6->daddr = ireq->ir_v6_rmt_addr;
554                 if (np->repflow && ireq->pktopts)
555                         fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
556
557                 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
558                                 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
559                                 (np->tclass & INET_ECN_MASK) :
560                                 np->tclass;
561
562                 if (!INET_ECN_is_capable(tclass) &&
563                     tcp_bpf_ca_needs_ecn((struct sock *)req))
564                         tclass |= INET_ECN_ECT_0;
565
566                 rcu_read_lock();
567                 opt = ireq->ipv6_opt;
568                 if (!opt)
569                         opt = rcu_dereference(np->opt);
570                 err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
571                                opt, tclass, sk->sk_priority);
572                 rcu_read_unlock();
573                 err = net_xmit_eval(err);
574         }
575
576 done:
577         return err;
578 }
579
580
581 static void tcp_v6_reqsk_destructor(struct request_sock *req)
582 {
583         kfree(inet_rsk(req)->ipv6_opt);
584         consume_skb(inet_rsk(req)->pktopts);
585 }
586
587 #ifdef CONFIG_TCP_MD5SIG
588 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
589                                                    const struct in6_addr *addr,
590                                                    int l3index)
591 {
592         return tcp_md5_do_lookup(sk, l3index,
593                                  (union tcp_md5_addr *)addr, AF_INET6);
594 }
595
596 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
597                                                 const struct sock *addr_sk)
598 {
599         int l3index;
600
601         l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
602                                                  addr_sk->sk_bound_dev_if);
603         return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
604                                     l3index);
605 }
606
607 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
608                                  sockptr_t optval, int optlen)
609 {
610         struct tcp_md5sig cmd;
611         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
612         int l3index = 0;
613         u8 prefixlen;
614         u8 flags;
615
616         if (optlen < sizeof(cmd))
617                 return -EINVAL;
618
619         if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
620                 return -EFAULT;
621
622         if (sin6->sin6_family != AF_INET6)
623                 return -EINVAL;
624
625         flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
626
627         if (optname == TCP_MD5SIG_EXT &&
628             cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
629                 prefixlen = cmd.tcpm_prefixlen;
630                 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
631                                         prefixlen > 32))
632                         return -EINVAL;
633         } else {
634                 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
635         }
636
637         if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
638             cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
639                 struct net_device *dev;
640
641                 rcu_read_lock();
642                 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
643                 if (dev && netif_is_l3_master(dev))
644                         l3index = dev->ifindex;
645                 rcu_read_unlock();
646
647                 /* ok to reference set/not set outside of rcu;
648                  * right now device MUST be an L3 master
649                  */
650                 if (!dev || !l3index)
651                         return -EINVAL;
652         }
653
654         if (!cmd.tcpm_keylen) {
655                 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
656                         return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
657                                               AF_INET, prefixlen,
658                                               l3index, flags);
659                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
660                                       AF_INET6, prefixlen, l3index, flags);
661         }
662
663         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
664                 return -EINVAL;
665
666         if (ipv6_addr_v4mapped(&sin6->sin6_addr))
667                 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
668                                       AF_INET, prefixlen, l3index, flags,
669                                       cmd.tcpm_key, cmd.tcpm_keylen,
670                                       GFP_KERNEL);
671
672         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
673                               AF_INET6, prefixlen, l3index, flags,
674                               cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
675 }
676
677 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
678                                    const struct in6_addr *daddr,
679                                    const struct in6_addr *saddr,
680                                    const struct tcphdr *th, int nbytes)
681 {
682         struct tcp6_pseudohdr *bp;
683         struct scatterlist sg;
684         struct tcphdr *_th;
685
686         bp = hp->scratch;
687         /* 1. TCP pseudo-header (RFC2460) */
688         bp->saddr = *saddr;
689         bp->daddr = *daddr;
690         bp->protocol = cpu_to_be32(IPPROTO_TCP);
691         bp->len = cpu_to_be32(nbytes);
692
693         _th = (struct tcphdr *)(bp + 1);
694         memcpy(_th, th, sizeof(*th));
695         _th->check = 0;
696
697         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
698         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
699                                 sizeof(*bp) + sizeof(*th));
700         return crypto_ahash_update(hp->md5_req);
701 }
702
703 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
704                                const struct in6_addr *daddr, struct in6_addr *saddr,
705                                const struct tcphdr *th)
706 {
707         struct tcp_md5sig_pool *hp;
708         struct ahash_request *req;
709
710         hp = tcp_get_md5sig_pool();
711         if (!hp)
712                 goto clear_hash_noput;
713         req = hp->md5_req;
714
715         if (crypto_ahash_init(req))
716                 goto clear_hash;
717         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
718                 goto clear_hash;
719         if (tcp_md5_hash_key(hp, key))
720                 goto clear_hash;
721         ahash_request_set_crypt(req, NULL, md5_hash, 0);
722         if (crypto_ahash_final(req))
723                 goto clear_hash;
724
725         tcp_put_md5sig_pool();
726         return 0;
727
728 clear_hash:
729         tcp_put_md5sig_pool();
730 clear_hash_noput:
731         memset(md5_hash, 0, 16);
732         return 1;
733 }
734
735 static int tcp_v6_md5_hash_skb(char *md5_hash,
736                                const struct tcp_md5sig_key *key,
737                                const struct sock *sk,
738                                const struct sk_buff *skb)
739 {
740         const struct in6_addr *saddr, *daddr;
741         struct tcp_md5sig_pool *hp;
742         struct ahash_request *req;
743         const struct tcphdr *th = tcp_hdr(skb);
744
745         if (sk) { /* valid for establish/request sockets */
746                 saddr = &sk->sk_v6_rcv_saddr;
747                 daddr = &sk->sk_v6_daddr;
748         } else {
749                 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
750                 saddr = &ip6h->saddr;
751                 daddr = &ip6h->daddr;
752         }
753
754         hp = tcp_get_md5sig_pool();
755         if (!hp)
756                 goto clear_hash_noput;
757         req = hp->md5_req;
758
759         if (crypto_ahash_init(req))
760                 goto clear_hash;
761
762         if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
763                 goto clear_hash;
764         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
765                 goto clear_hash;
766         if (tcp_md5_hash_key(hp, key))
767                 goto clear_hash;
768         ahash_request_set_crypt(req, NULL, md5_hash, 0);
769         if (crypto_ahash_final(req))
770                 goto clear_hash;
771
772         tcp_put_md5sig_pool();
773         return 0;
774
775 clear_hash:
776         tcp_put_md5sig_pool();
777 clear_hash_noput:
778         memset(md5_hash, 0, 16);
779         return 1;
780 }
781
782 #endif
783
784 static void tcp_v6_init_req(struct request_sock *req,
785                             const struct sock *sk_listener,
786                             struct sk_buff *skb)
787 {
788         bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
789         struct inet_request_sock *ireq = inet_rsk(req);
790         const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
791
792         ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
793         ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
794
795         /* So that link locals have meaning */
796         if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
797             ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
798                 ireq->ir_iif = tcp_v6_iif(skb);
799
800         if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
801             (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
802              np->rxopt.bits.rxinfo ||
803              np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
804              np->rxopt.bits.rxohlim || np->repflow)) {
805                 refcount_inc(&skb->users);
806                 ireq->pktopts = skb;
807         }
808 }
809
810 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
811                                           struct sk_buff *skb,
812                                           struct flowi *fl,
813                                           struct request_sock *req)
814 {
815         tcp_v6_init_req(req, sk, skb);
816
817         if (security_inet_conn_request(sk, skb, req))
818                 return NULL;
819
820         return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
821 }
822
823 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
824         .family         =       AF_INET6,
825         .obj_size       =       sizeof(struct tcp6_request_sock),
826         .rtx_syn_ack    =       tcp_rtx_synack,
827         .send_ack       =       tcp_v6_reqsk_send_ack,
828         .destructor     =       tcp_v6_reqsk_destructor,
829         .send_reset     =       tcp_v6_send_reset,
830         .syn_ack_timeout =      tcp_syn_ack_timeout,
831 };
832
833 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
834         .mss_clamp      =       IPV6_MIN_MTU - sizeof(struct tcphdr) -
835                                 sizeof(struct ipv6hdr),
836 #ifdef CONFIG_TCP_MD5SIG
837         .req_md5_lookup =       tcp_v6_md5_lookup,
838         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
839 #endif
840 #ifdef CONFIG_SYN_COOKIES
841         .cookie_init_seq =      cookie_v6_init_sequence,
842 #endif
843         .route_req      =       tcp_v6_route_req,
844         .init_seq       =       tcp_v6_init_seq,
845         .init_ts_off    =       tcp_v6_init_ts_off,
846         .send_synack    =       tcp_v6_send_synack,
847 };
848
849 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
850                                  u32 ack, u32 win, u32 tsval, u32 tsecr,
851                                  int oif, struct tcp_md5sig_key *key, int rst,
852                                  u8 tclass, __be32 label, u32 priority, u32 txhash)
853 {
854         const struct tcphdr *th = tcp_hdr(skb);
855         struct tcphdr *t1;
856         struct sk_buff *buff;
857         struct flowi6 fl6;
858         struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
859         struct sock *ctl_sk = net->ipv6.tcp_sk;
860         unsigned int tot_len = sizeof(struct tcphdr);
861         __be32 mrst = 0, *topt;
862         struct dst_entry *dst;
863         __u32 mark = 0;
864
865         if (tsecr)
866                 tot_len += TCPOLEN_TSTAMP_ALIGNED;
867 #ifdef CONFIG_TCP_MD5SIG
868         if (key)
869                 tot_len += TCPOLEN_MD5SIG_ALIGNED;
870 #endif
871
872 #ifdef CONFIG_MPTCP
873         if (rst && !key) {
874                 mrst = mptcp_reset_option(skb);
875
876                 if (mrst)
877                         tot_len += sizeof(__be32);
878         }
879 #endif
880
881         buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
882         if (!buff)
883                 return;
884
885         skb_reserve(buff, MAX_TCP_HEADER);
886
887         t1 = skb_push(buff, tot_len);
888         skb_reset_transport_header(buff);
889
890         /* Swap the send and the receive. */
891         memset(t1, 0, sizeof(*t1));
892         t1->dest = th->source;
893         t1->source = th->dest;
894         t1->doff = tot_len / 4;
895         t1->seq = htonl(seq);
896         t1->ack_seq = htonl(ack);
897         t1->ack = !rst || !th->ack;
898         t1->rst = rst;
899         t1->window = htons(win);
900
901         topt = (__be32 *)(t1 + 1);
902
903         if (tsecr) {
904                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
905                                 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
906                 *topt++ = htonl(tsval);
907                 *topt++ = htonl(tsecr);
908         }
909
910         if (mrst)
911                 *topt++ = mrst;
912
913 #ifdef CONFIG_TCP_MD5SIG
914         if (key) {
915                 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
916                                 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
917                 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
918                                     &ipv6_hdr(skb)->saddr,
919                                     &ipv6_hdr(skb)->daddr, t1);
920         }
921 #endif
922
923         memset(&fl6, 0, sizeof(fl6));
924         fl6.daddr = ipv6_hdr(skb)->saddr;
925         fl6.saddr = ipv6_hdr(skb)->daddr;
926         fl6.flowlabel = label;
927
928         buff->ip_summed = CHECKSUM_PARTIAL;
929
930         __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
931
932         fl6.flowi6_proto = IPPROTO_TCP;
933         if (rt6_need_strict(&fl6.daddr) && !oif)
934                 fl6.flowi6_oif = tcp_v6_iif(skb);
935         else {
936                 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
937                         oif = skb->skb_iif;
938
939                 fl6.flowi6_oif = oif;
940         }
941
942         if (sk) {
943                 if (sk->sk_state == TCP_TIME_WAIT)
944                         mark = inet_twsk(sk)->tw_mark;
945                 else
946                         mark = READ_ONCE(sk->sk_mark);
947                 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
948         }
949         if (txhash) {
950                 /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
951                 skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
952         }
953         fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
954         fl6.fl6_dport = t1->dest;
955         fl6.fl6_sport = t1->source;
956         fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
957         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
958
959         /* Pass a socket to ip6_dst_lookup either it is for RST
960          * Underlying function will use this to retrieve the network
961          * namespace
962          */
963         if (sk && sk->sk_state != TCP_TIME_WAIT)
964                 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
965         else
966                 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
967         if (!IS_ERR(dst)) {
968                 skb_dst_set(buff, dst);
969                 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
970                          tclass & ~INET_ECN_MASK, priority);
971                 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
972                 if (rst)
973                         TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
974                 return;
975         }
976
977         kfree_skb(buff);
978 }
979
980 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
981 {
982         const struct tcphdr *th = tcp_hdr(skb);
983         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
984         u32 seq = 0, ack_seq = 0;
985         struct tcp_md5sig_key *key = NULL;
986 #ifdef CONFIG_TCP_MD5SIG
987         const __u8 *hash_location = NULL;
988         unsigned char newhash[16];
989         int genhash;
990         struct sock *sk1 = NULL;
991 #endif
992         __be32 label = 0;
993         u32 priority = 0;
994         struct net *net;
995         u32 txhash = 0;
996         int oif = 0;
997
998         if (th->rst)
999                 return;
1000
1001         /* If sk not NULL, it means we did a successful lookup and incoming
1002          * route had to be correct. prequeue might have dropped our dst.
1003          */
1004         if (!sk && !ipv6_unicast_destination(skb))
1005                 return;
1006
1007         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1008 #ifdef CONFIG_TCP_MD5SIG
1009         rcu_read_lock();
1010         hash_location = tcp_parse_md5sig_option(th);
1011         if (sk && sk_fullsock(sk)) {
1012                 int l3index;
1013
1014                 /* sdif set, means packet ingressed via a device
1015                  * in an L3 domain and inet_iif is set to it.
1016                  */
1017                 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1018                 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1019         } else if (hash_location) {
1020                 int dif = tcp_v6_iif_l3_slave(skb);
1021                 int sdif = tcp_v6_sdif(skb);
1022                 int l3index;
1023
1024                 /*
1025                  * active side is lost. Try to find listening socket through
1026                  * source port, and then find md5 key through listening socket.
1027                  * we are not loose security here:
1028                  * Incoming packet is checked with md5 hash with finding key,
1029                  * no RST generated if md5 hash doesn't match.
1030                  */
1031                 sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1032                                             NULL, 0, &ipv6h->saddr, th->source,
1033                                             &ipv6h->daddr, ntohs(th->source),
1034                                             dif, sdif);
1035                 if (!sk1)
1036                         goto out;
1037
1038                 /* sdif set, means packet ingressed via a device
1039                  * in an L3 domain and dif is set to it.
1040                  */
1041                 l3index = tcp_v6_sdif(skb) ? dif : 0;
1042
1043                 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1044                 if (!key)
1045                         goto out;
1046
1047                 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1048                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1049                         goto out;
1050         }
1051 #endif
1052
1053         if (th->ack)
1054                 seq = ntohl(th->ack_seq);
1055         else
1056                 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1057                           (th->doff << 2);
1058
1059         if (sk) {
1060                 oif = sk->sk_bound_dev_if;
1061                 if (sk_fullsock(sk)) {
1062                         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1063
1064                         trace_tcp_send_reset(sk, skb);
1065                         if (np->repflow)
1066                                 label = ip6_flowlabel(ipv6h);
1067                         priority = sk->sk_priority;
1068                         txhash = sk->sk_txhash;
1069                 }
1070                 if (sk->sk_state == TCP_TIME_WAIT) {
1071                         label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1072                         priority = inet_twsk(sk)->tw_priority;
1073                         txhash = inet_twsk(sk)->tw_txhash;
1074                 }
1075         } else {
1076                 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1077                         label = ip6_flowlabel(ipv6h);
1078         }
1079
1080         tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1081                              ipv6_get_dsfield(ipv6h), label, priority, txhash);
1082
1083 #ifdef CONFIG_TCP_MD5SIG
1084 out:
1085         rcu_read_unlock();
1086 #endif
1087 }
1088
1089 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1090                             u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1091                             struct tcp_md5sig_key *key, u8 tclass,
1092                             __be32 label, u32 priority, u32 txhash)
1093 {
1094         tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1095                              tclass, label, priority, txhash);
1096 }
1097
1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100         struct inet_timewait_sock *tw = inet_twsk(sk);
1101         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1102
1103         tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1104                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1105                         tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1106                         tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1107                         tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1108                         tw->tw_txhash);
1109
1110         inet_twsk_put(tw);
1111 }
1112
1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114                                   struct request_sock *req)
1115 {
1116         int l3index;
1117
1118         l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122          */
1123         /* RFC 7323 2.3
1124          * The window field (SEG.WND) of every outgoing segment, with the
1125          * exception of <SYN> segments, MUST be right-shifted by
1126          * Rcv.Wind.Shift bits:
1127          */
1128         tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129                         tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130                         tcp_rsk(req)->rcv_nxt,
1131                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132                         tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133                         READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1134                         tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135                         ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1136                         READ_ONCE(sk->sk_priority),
1137                         READ_ONCE(tcp_rsk(req)->txhash));
1138 }
1139
1140
1141 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1142 {
1143 #ifdef CONFIG_SYN_COOKIES
1144         const struct tcphdr *th = tcp_hdr(skb);
1145
1146         if (!th->syn)
1147                 sk = cookie_v6_check(sk, skb);
1148 #endif
1149         return sk;
1150 }
1151
1152 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1153                          struct tcphdr *th, u32 *cookie)
1154 {
1155         u16 mss = 0;
1156 #ifdef CONFIG_SYN_COOKIES
1157         mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1158                                     &tcp_request_sock_ipv6_ops, sk, th);
1159         if (mss) {
1160                 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1161                 tcp_synq_overflow(sk);
1162         }
1163 #endif
1164         return mss;
1165 }
1166
1167 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1168 {
1169         if (skb->protocol == htons(ETH_P_IP))
1170                 return tcp_v4_conn_request(sk, skb);
1171
1172         if (!ipv6_unicast_destination(skb))
1173                 goto drop;
1174
1175         if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1176                 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1177                 return 0;
1178         }
1179
1180         return tcp_conn_request(&tcp6_request_sock_ops,
1181                                 &tcp_request_sock_ipv6_ops, sk, skb);
1182
1183 drop:
1184         tcp_listendrop(sk);
1185         return 0; /* don't send reset */
1186 }
1187
1188 static void tcp_v6_restore_cb(struct sk_buff *skb)
1189 {
1190         /* We need to move header back to the beginning if xfrm6_policy_check()
1191          * and tcp_v6_fill_cb() are going to be called again.
1192          * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1193          */
1194         memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1195                 sizeof(struct inet6_skb_parm));
1196 }
1197
1198 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1199                                          struct request_sock *req,
1200                                          struct dst_entry *dst,
1201                                          struct request_sock *req_unhash,
1202                                          bool *own_req)
1203 {
1204         struct inet_request_sock *ireq;
1205         struct ipv6_pinfo *newnp;
1206         const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1207         struct ipv6_txoptions *opt;
1208         struct inet_sock *newinet;
1209         bool found_dup_sk = false;
1210         struct tcp_sock *newtp;
1211         struct sock *newsk;
1212 #ifdef CONFIG_TCP_MD5SIG
1213         struct tcp_md5sig_key *key;
1214         int l3index;
1215 #endif
1216         struct flowi6 fl6;
1217
1218         if (skb->protocol == htons(ETH_P_IP)) {
1219                 /*
1220                  *      v6 mapped
1221                  */
1222
1223                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1224                                              req_unhash, own_req);
1225
1226                 if (!newsk)
1227                         return NULL;
1228
1229                 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1230
1231                 newnp = tcp_inet6_sk(newsk);
1232                 newtp = tcp_sk(newsk);
1233
1234                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1235
1236                 newnp->saddr = newsk->sk_v6_rcv_saddr;
1237
1238                 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1239                 if (sk_is_mptcp(newsk))
1240                         mptcpv6_handle_mapped(newsk, true);
1241                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1242 #ifdef CONFIG_TCP_MD5SIG
1243                 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1244 #endif
1245
1246                 newnp->ipv6_mc_list = NULL;
1247                 newnp->ipv6_ac_list = NULL;
1248                 newnp->ipv6_fl_list = NULL;
1249                 newnp->pktoptions  = NULL;
1250                 newnp->opt         = NULL;
1251                 newnp->mcast_oif   = inet_iif(skb);
1252                 newnp->mcast_hops  = ip_hdr(skb)->ttl;
1253                 newnp->rcv_flowinfo = 0;
1254                 if (np->repflow)
1255                         newnp->flow_label = 0;
1256
1257                 /*
1258                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1259                  * here, tcp_create_openreq_child now does this for us, see the comment in
1260                  * that function for the gory details. -acme
1261                  */
1262
1263                 /* It is tricky place. Until this moment IPv4 tcp
1264                    worked with IPv6 icsk.icsk_af_ops.
1265                    Sync it now.
1266                  */
1267                 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1268
1269                 return newsk;
1270         }
1271
1272         ireq = inet_rsk(req);
1273
1274         if (sk_acceptq_is_full(sk))
1275                 goto out_overflow;
1276
1277         if (!dst) {
1278                 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1279                 if (!dst)
1280                         goto out;
1281         }
1282
1283         newsk = tcp_create_openreq_child(sk, req, skb);
1284         if (!newsk)
1285                 goto out_nonewsk;
1286
1287         /*
1288          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1289          * count here, tcp_create_openreq_child now does this for us, see the
1290          * comment in that function for the gory details. -acme
1291          */
1292
1293         newsk->sk_gso_type = SKB_GSO_TCPV6;
1294         ip6_dst_store(newsk, dst, NULL, NULL);
1295         inet6_sk_rx_dst_set(newsk, skb);
1296
1297         inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1298
1299         newtp = tcp_sk(newsk);
1300         newinet = inet_sk(newsk);
1301         newnp = tcp_inet6_sk(newsk);
1302
1303         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1304
1305         newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1306         newnp->saddr = ireq->ir_v6_loc_addr;
1307         newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1308         newsk->sk_bound_dev_if = ireq->ir_iif;
1309
1310         /* Now IPv6 options...
1311
1312            First: no IPv4 options.
1313          */
1314         newinet->inet_opt = NULL;
1315         newnp->ipv6_mc_list = NULL;
1316         newnp->ipv6_ac_list = NULL;
1317         newnp->ipv6_fl_list = NULL;
1318
1319         /* Clone RX bits */
1320         newnp->rxopt.all = np->rxopt.all;
1321
1322         newnp->pktoptions = NULL;
1323         newnp->opt        = NULL;
1324         newnp->mcast_oif  = tcp_v6_iif(skb);
1325         newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1326         newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1327         if (np->repflow)
1328                 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1329
1330         /* Set ToS of the new socket based upon the value of incoming SYN.
1331          * ECT bits are set later in tcp_init_transfer().
1332          */
1333         if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1334                 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1335
1336         /* Clone native IPv6 options from listening socket (if any)
1337
1338            Yes, keeping reference count would be much more clever,
1339            but we make one more one thing there: reattach optmem
1340            to newsk.
1341          */
1342         opt = ireq->ipv6_opt;
1343         if (!opt)
1344                 opt = rcu_dereference(np->opt);
1345         if (opt) {
1346                 opt = ipv6_dup_options(newsk, opt);
1347                 RCU_INIT_POINTER(newnp->opt, opt);
1348         }
1349         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1350         if (opt)
1351                 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1352                                                     opt->opt_flen;
1353
1354         tcp_ca_openreq_child(newsk, dst);
1355
1356         tcp_sync_mss(newsk, dst_mtu(dst));
1357         newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1358
1359         tcp_initialize_rcv_mss(newsk);
1360
1361         newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1362         newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1363
1364 #ifdef CONFIG_TCP_MD5SIG
1365         l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1366
1367         /* Copy over the MD5 key from the original socket */
1368         key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1369         if (key) {
1370                 /* We're using one, so create a matching key
1371                  * on the newsk structure. If we fail to get
1372                  * memory, then we end up not copying the key
1373                  * across. Shucks.
1374                  */
1375                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1376                                AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1377                                sk_gfp_mask(sk, GFP_ATOMIC));
1378         }
1379 #endif
1380
1381         if (__inet_inherit_port(sk, newsk) < 0) {
1382                 inet_csk_prepare_forced_close(newsk);
1383                 tcp_done(newsk);
1384                 goto out;
1385         }
1386         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1387                                        &found_dup_sk);
1388         if (*own_req) {
1389                 tcp_move_syn(newtp, req);
1390
1391                 /* Clone pktoptions received with SYN, if we own the req */
1392                 if (ireq->pktopts) {
1393                         newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1394                         consume_skb(ireq->pktopts);
1395                         ireq->pktopts = NULL;
1396                         if (newnp->pktoptions)
1397                                 tcp_v6_restore_cb(newnp->pktoptions);
1398                 }
1399         } else {
1400                 if (!req_unhash && found_dup_sk) {
1401                         /* This code path should only be executed in the
1402                          * syncookie case only
1403                          */
1404                         bh_unlock_sock(newsk);
1405                         sock_put(newsk);
1406                         newsk = NULL;
1407                 }
1408         }
1409
1410         return newsk;
1411
1412 out_overflow:
1413         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1414 out_nonewsk:
1415         dst_release(dst);
1416 out:
1417         tcp_listendrop(sk);
1418         return NULL;
1419 }
1420
1421 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1422                                                            u32));
1423 /* The socket must have it's spinlock held when we get
1424  * here, unless it is a TCP_LISTEN socket.
1425  *
1426  * We have a potential double-lock case here, so even when
1427  * doing backlog processing we use the BH locking scheme.
1428  * This is because we cannot sleep with the original spinlock
1429  * held.
1430  */
1431 INDIRECT_CALLABLE_SCOPE
1432 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1433 {
1434         struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1435         struct sk_buff *opt_skb = NULL;
1436         enum skb_drop_reason reason;
1437         struct tcp_sock *tp;
1438
1439         /* Imagine: socket is IPv6. IPv4 packet arrives,
1440            goes to IPv4 receive handler and backlogged.
1441            From backlog it always goes here. Kerboom...
1442            Fortunately, tcp_rcv_established and rcv_established
1443            handle them correctly, but it is not case with
1444            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1445          */
1446
1447         if (skb->protocol == htons(ETH_P_IP))
1448                 return tcp_v4_do_rcv(sk, skb);
1449
1450         /*
1451          *      socket locking is here for SMP purposes as backlog rcv
1452          *      is currently called with bh processing disabled.
1453          */
1454
1455         /* Do Stevens' IPV6_PKTOPTIONS.
1456
1457            Yes, guys, it is the only place in our code, where we
1458            may make it not affecting IPv4.
1459            The rest of code is protocol independent,
1460            and I do not like idea to uglify IPv4.
1461
1462            Actually, all the idea behind IPV6_PKTOPTIONS
1463            looks not very well thought. For now we latch
1464            options, received in the last packet, enqueued
1465            by tcp. Feel free to propose better solution.
1466                                                --ANK (980728)
1467          */
1468         if (np->rxopt.all)
1469                 opt_skb = skb_clone_and_charge_r(skb, sk);
1470
1471         reason = SKB_DROP_REASON_NOT_SPECIFIED;
1472         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1473                 struct dst_entry *dst;
1474
1475                 dst = rcu_dereference_protected(sk->sk_rx_dst,
1476                                                 lockdep_sock_is_held(sk));
1477
1478                 sock_rps_save_rxhash(sk, skb);
1479                 sk_mark_napi_id(sk, skb);
1480                 if (dst) {
1481                         if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1482                             INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1483                                             dst, sk->sk_rx_dst_cookie) == NULL) {
1484                                 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1485                                 dst_release(dst);
1486                         }
1487                 }
1488
1489                 tcp_rcv_established(sk, skb);
1490                 if (opt_skb)
1491                         goto ipv6_pktoptions;
1492                 return 0;
1493         }
1494
1495         if (tcp_checksum_complete(skb))
1496                 goto csum_err;
1497
1498         if (sk->sk_state == TCP_LISTEN) {
1499                 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1500
1501                 if (!nsk)
1502                         goto discard;
1503
1504                 if (nsk != sk) {
1505                         if (tcp_child_process(sk, nsk, skb))
1506                                 goto reset;
1507                         if (opt_skb)
1508                                 __kfree_skb(opt_skb);
1509                         return 0;
1510                 }
1511         } else
1512                 sock_rps_save_rxhash(sk, skb);
1513
1514         if (tcp_rcv_state_process(sk, skb))
1515                 goto reset;
1516         if (opt_skb)
1517                 goto ipv6_pktoptions;
1518         return 0;
1519
1520 reset:
1521         tcp_v6_send_reset(sk, skb);
1522 discard:
1523         if (opt_skb)
1524                 __kfree_skb(opt_skb);
1525         kfree_skb_reason(skb, reason);
1526         return 0;
1527 csum_err:
1528         reason = SKB_DROP_REASON_TCP_CSUM;
1529         trace_tcp_bad_csum(skb);
1530         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1531         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1532         goto discard;
1533
1534
1535 ipv6_pktoptions:
1536         /* Do you ask, what is it?
1537
1538            1. skb was enqueued by tcp.
1539            2. skb is added to tail of read queue, rather than out of order.
1540            3. socket is not in passive state.
1541            4. Finally, it really contains options, which user wants to receive.
1542          */
1543         tp = tcp_sk(sk);
1544         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1545             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1546                 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1547                         np->mcast_oif = tcp_v6_iif(opt_skb);
1548                 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1549                         np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1550                 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1551                         np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1552                 if (np->repflow)
1553                         np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1554                 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1555                         tcp_v6_restore_cb(opt_skb);
1556                         opt_skb = xchg(&np->pktoptions, opt_skb);
1557                 } else {
1558                         __kfree_skb(opt_skb);
1559                         opt_skb = xchg(&np->pktoptions, NULL);
1560                 }
1561         }
1562
1563         consume_skb(opt_skb);
1564         return 0;
1565 }
1566
1567 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1568                            const struct tcphdr *th)
1569 {
1570         /* This is tricky: we move IP6CB at its correct location into
1571          * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1572          * _decode_session6() uses IP6CB().
1573          * barrier() makes sure compiler won't play aliasing games.
1574          */
1575         memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1576                 sizeof(struct inet6_skb_parm));
1577         barrier();
1578
1579         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1580         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1581                                     skb->len - th->doff*4);
1582         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1583         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1584         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1585         TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1586         TCP_SKB_CB(skb)->sacked = 0;
1587         TCP_SKB_CB(skb)->has_rxtstamp =
1588                         skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1589 }
1590
1591 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1592 {
1593         enum skb_drop_reason drop_reason;
1594         int sdif = inet6_sdif(skb);
1595         int dif = inet6_iif(skb);
1596         const struct tcphdr *th;
1597         const struct ipv6hdr *hdr;
1598         bool refcounted;
1599         struct sock *sk;
1600         int ret;
1601         struct net *net = dev_net(skb->dev);
1602
1603         drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1604         if (skb->pkt_type != PACKET_HOST)
1605                 goto discard_it;
1606
1607         /*
1608          *      Count it even if it's bad.
1609          */
1610         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1611
1612         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1613                 goto discard_it;
1614
1615         th = (const struct tcphdr *)skb->data;
1616
1617         if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1618                 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1619                 goto bad_packet;
1620         }
1621         if (!pskb_may_pull(skb, th->doff*4))
1622                 goto discard_it;
1623
1624         if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1625                 goto csum_error;
1626
1627         th = (const struct tcphdr *)skb->data;
1628         hdr = ipv6_hdr(skb);
1629
1630 lookup:
1631         sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1632                                 th->source, th->dest, inet6_iif(skb), sdif,
1633                                 &refcounted);
1634         if (!sk)
1635                 goto no_tcp_socket;
1636
1637 process:
1638         if (sk->sk_state == TCP_TIME_WAIT)
1639                 goto do_time_wait;
1640
1641         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1642                 struct request_sock *req = inet_reqsk(sk);
1643                 bool req_stolen = false;
1644                 struct sock *nsk;
1645
1646                 sk = req->rsk_listener;
1647                 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1648                         drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1649                 else
1650                         drop_reason = tcp_inbound_md5_hash(sk, skb,
1651                                                            &hdr->saddr, &hdr->daddr,
1652                                                            AF_INET6, dif, sdif);
1653                 if (drop_reason) {
1654                         sk_drops_add(sk, skb);
1655                         reqsk_put(req);
1656                         goto discard_it;
1657                 }
1658                 if (tcp_checksum_complete(skb)) {
1659                         reqsk_put(req);
1660                         goto csum_error;
1661                 }
1662                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1663                         nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1664                         if (!nsk) {
1665                                 inet_csk_reqsk_queue_drop_and_put(sk, req);
1666                                 goto lookup;
1667                         }
1668                         sk = nsk;
1669                         /* reuseport_migrate_sock() has already held one sk_refcnt
1670                          * before returning.
1671                          */
1672                 } else {
1673                         sock_hold(sk);
1674                 }
1675                 refcounted = true;
1676                 nsk = NULL;
1677                 if (!tcp_filter(sk, skb)) {
1678                         th = (const struct tcphdr *)skb->data;
1679                         hdr = ipv6_hdr(skb);
1680                         tcp_v6_fill_cb(skb, hdr, th);
1681                         nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1682                 } else {
1683                         drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1684                 }
1685                 if (!nsk) {
1686                         reqsk_put(req);
1687                         if (req_stolen) {
1688                                 /* Another cpu got exclusive access to req
1689                                  * and created a full blown socket.
1690                                  * Try to feed this packet to this socket
1691                                  * instead of discarding it.
1692                                  */
1693                                 tcp_v6_restore_cb(skb);
1694                                 sock_put(sk);
1695                                 goto lookup;
1696                         }
1697                         goto discard_and_relse;
1698                 }
1699                 nf_reset_ct(skb);
1700                 if (nsk == sk) {
1701                         reqsk_put(req);
1702                         tcp_v6_restore_cb(skb);
1703                 } else if (tcp_child_process(sk, nsk, skb)) {
1704                         tcp_v6_send_reset(nsk, skb);
1705                         goto discard_and_relse;
1706                 } else {
1707                         sock_put(sk);
1708                         return 0;
1709                 }
1710         }
1711
1712         if (static_branch_unlikely(&ip6_min_hopcount)) {
1713                 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1714                 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1715                         __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1716                         goto discard_and_relse;
1717                 }
1718         }
1719
1720         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1721                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1722                 goto discard_and_relse;
1723         }
1724
1725         drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1726                                            AF_INET6, dif, sdif);
1727         if (drop_reason)
1728                 goto discard_and_relse;
1729
1730         nf_reset_ct(skb);
1731
1732         if (tcp_filter(sk, skb)) {
1733                 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1734                 goto discard_and_relse;
1735         }
1736         th = (const struct tcphdr *)skb->data;
1737         hdr = ipv6_hdr(skb);
1738         tcp_v6_fill_cb(skb, hdr, th);
1739
1740         skb->dev = NULL;
1741
1742         if (sk->sk_state == TCP_LISTEN) {
1743                 ret = tcp_v6_do_rcv(sk, skb);
1744                 goto put_and_return;
1745         }
1746
1747         sk_incoming_cpu_update(sk);
1748
1749         bh_lock_sock_nested(sk);
1750         tcp_segs_in(tcp_sk(sk), skb);
1751         ret = 0;
1752         if (!sock_owned_by_user(sk)) {
1753                 ret = tcp_v6_do_rcv(sk, skb);
1754         } else {
1755                 if (tcp_add_backlog(sk, skb, &drop_reason))
1756                         goto discard_and_relse;
1757         }
1758         bh_unlock_sock(sk);
1759 put_and_return:
1760         if (refcounted)
1761                 sock_put(sk);
1762         return ret ? -1 : 0;
1763
1764 no_tcp_socket:
1765         drop_reason = SKB_DROP_REASON_NO_SOCKET;
1766         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1767                 goto discard_it;
1768
1769         tcp_v6_fill_cb(skb, hdr, th);
1770
1771         if (tcp_checksum_complete(skb)) {
1772 csum_error:
1773                 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1774                 trace_tcp_bad_csum(skb);
1775                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1776 bad_packet:
1777                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1778         } else {
1779                 tcp_v6_send_reset(NULL, skb);
1780         }
1781
1782 discard_it:
1783         SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1784         kfree_skb_reason(skb, drop_reason);
1785         return 0;
1786
1787 discard_and_relse:
1788         sk_drops_add(sk, skb);
1789         if (refcounted)
1790                 sock_put(sk);
1791         goto discard_it;
1792
1793 do_time_wait:
1794         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1795                 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1796                 inet_twsk_put(inet_twsk(sk));
1797                 goto discard_it;
1798         }
1799
1800         tcp_v6_fill_cb(skb, hdr, th);
1801
1802         if (tcp_checksum_complete(skb)) {
1803                 inet_twsk_put(inet_twsk(sk));
1804                 goto csum_error;
1805         }
1806
1807         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1808         case TCP_TW_SYN:
1809         {
1810                 struct sock *sk2;
1811
1812                 sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1813                                             skb, __tcp_hdrlen(th),
1814                                             &ipv6_hdr(skb)->saddr, th->source,
1815                                             &ipv6_hdr(skb)->daddr,
1816                                             ntohs(th->dest),
1817                                             tcp_v6_iif_l3_slave(skb),
1818                                             sdif);
1819                 if (sk2) {
1820                         struct inet_timewait_sock *tw = inet_twsk(sk);
1821                         inet_twsk_deschedule_put(tw);
1822                         sk = sk2;
1823                         tcp_v6_restore_cb(skb);
1824                         refcounted = false;
1825                         goto process;
1826                 }
1827         }
1828                 /* to ACK */
1829                 fallthrough;
1830         case TCP_TW_ACK:
1831                 tcp_v6_timewait_ack(sk, skb);
1832                 break;
1833         case TCP_TW_RST:
1834                 tcp_v6_send_reset(sk, skb);
1835                 inet_twsk_deschedule_put(inet_twsk(sk));
1836                 goto discard_it;
1837         case TCP_TW_SUCCESS:
1838                 ;
1839         }
1840         goto discard_it;
1841 }
1842
1843 void tcp_v6_early_demux(struct sk_buff *skb)
1844 {
1845         struct net *net = dev_net(skb->dev);
1846         const struct ipv6hdr *hdr;
1847         const struct tcphdr *th;
1848         struct sock *sk;
1849
1850         if (skb->pkt_type != PACKET_HOST)
1851                 return;
1852
1853         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1854                 return;
1855
1856         hdr = ipv6_hdr(skb);
1857         th = tcp_hdr(skb);
1858
1859         if (th->doff < sizeof(struct tcphdr) / 4)
1860                 return;
1861
1862         /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1863         sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1864                                         &hdr->saddr, th->source,
1865                                         &hdr->daddr, ntohs(th->dest),
1866                                         inet6_iif(skb), inet6_sdif(skb));
1867         if (sk) {
1868                 skb->sk = sk;
1869                 skb->destructor = sock_edemux;
1870                 if (sk_fullsock(sk)) {
1871                         struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1872
1873                         if (dst)
1874                                 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1875                         if (dst &&
1876                             sk->sk_rx_dst_ifindex == skb->skb_iif)
1877                                 skb_dst_set_noref(skb, dst);
1878                 }
1879         }
1880 }
1881
1882 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1883         .twsk_obj_size  = sizeof(struct tcp6_timewait_sock),
1884         .twsk_unique    = tcp_twsk_unique,
1885         .twsk_destructor = tcp_twsk_destructor,
1886 };
1887
1888 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1889 {
1890         __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1891 }
1892
1893 const struct inet_connection_sock_af_ops ipv6_specific = {
1894         .queue_xmit        = inet6_csk_xmit,
1895         .send_check        = tcp_v6_send_check,
1896         .rebuild_header    = inet6_sk_rebuild_header,
1897         .sk_rx_dst_set     = inet6_sk_rx_dst_set,
1898         .conn_request      = tcp_v6_conn_request,
1899         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1900         .net_header_len    = sizeof(struct ipv6hdr),
1901         .net_frag_header_len = sizeof(struct frag_hdr),
1902         .setsockopt        = ipv6_setsockopt,
1903         .getsockopt        = ipv6_getsockopt,
1904         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1905         .sockaddr_len      = sizeof(struct sockaddr_in6),
1906         .mtu_reduced       = tcp_v6_mtu_reduced,
1907 };
1908
1909 #ifdef CONFIG_TCP_MD5SIG
1910 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1911         .md5_lookup     =       tcp_v6_md5_lookup,
1912         .calc_md5_hash  =       tcp_v6_md5_hash_skb,
1913         .md5_parse      =       tcp_v6_parse_md5_keys,
1914 };
1915 #endif
1916
1917 /*
1918  *      TCP over IPv4 via INET6 API
1919  */
1920 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1921         .queue_xmit        = ip_queue_xmit,
1922         .send_check        = tcp_v4_send_check,
1923         .rebuild_header    = inet_sk_rebuild_header,
1924         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1925         .conn_request      = tcp_v6_conn_request,
1926         .syn_recv_sock     = tcp_v6_syn_recv_sock,
1927         .net_header_len    = sizeof(struct iphdr),
1928         .setsockopt        = ipv6_setsockopt,
1929         .getsockopt        = ipv6_getsockopt,
1930         .addr2sockaddr     = inet6_csk_addr2sockaddr,
1931         .sockaddr_len      = sizeof(struct sockaddr_in6),
1932         .mtu_reduced       = tcp_v4_mtu_reduced,
1933 };
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1937         .md5_lookup     =       tcp_v4_md5_lookup,
1938         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1939         .md5_parse      =       tcp_v6_parse_md5_keys,
1940 };
1941 #endif
1942
1943 /* NOTE: A lot of things set to zero explicitly by call to
1944  *       sk_alloc() so need not be done here.
1945  */
1946 static int tcp_v6_init_sock(struct sock *sk)
1947 {
1948         struct inet_connection_sock *icsk = inet_csk(sk);
1949
1950         tcp_init_sock(sk);
1951
1952         icsk->icsk_af_ops = &ipv6_specific;
1953
1954 #ifdef CONFIG_TCP_MD5SIG
1955         tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1956 #endif
1957
1958         return 0;
1959 }
1960
1961 #ifdef CONFIG_PROC_FS
1962 /* Proc filesystem TCPv6 sock list dumping. */
1963 static void get_openreq6(struct seq_file *seq,
1964                          const struct request_sock *req, int i)
1965 {
1966         long ttd = req->rsk_timer.expires - jiffies;
1967         const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1968         const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1969
1970         if (ttd < 0)
1971                 ttd = 0;
1972
1973         seq_printf(seq,
1974                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1976                    i,
1977                    src->s6_addr32[0], src->s6_addr32[1],
1978                    src->s6_addr32[2], src->s6_addr32[3],
1979                    inet_rsk(req)->ir_num,
1980                    dest->s6_addr32[0], dest->s6_addr32[1],
1981                    dest->s6_addr32[2], dest->s6_addr32[3],
1982                    ntohs(inet_rsk(req)->ir_rmt_port),
1983                    TCP_SYN_RECV,
1984                    0, 0, /* could print option size, but that is af dependent. */
1985                    1,   /* timers active (only the expire timer) */
1986                    jiffies_to_clock_t(ttd),
1987                    req->num_timeout,
1988                    from_kuid_munged(seq_user_ns(seq),
1989                                     sock_i_uid(req->rsk_listener)),
1990                    0,  /* non standard timer */
1991                    0, /* open_requests have no inode */
1992                    0, req);
1993 }
1994
1995 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1996 {
1997         const struct in6_addr *dest, *src;
1998         __u16 destp, srcp;
1999         int timer_active;
2000         unsigned long timer_expires;
2001         const struct inet_sock *inet = inet_sk(sp);
2002         const struct tcp_sock *tp = tcp_sk(sp);
2003         const struct inet_connection_sock *icsk = inet_csk(sp);
2004         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2005         int rx_queue;
2006         int state;
2007
2008         dest  = &sp->sk_v6_daddr;
2009         src   = &sp->sk_v6_rcv_saddr;
2010         destp = ntohs(inet->inet_dport);
2011         srcp  = ntohs(inet->inet_sport);
2012
2013         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2014             icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2015             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2016                 timer_active    = 1;
2017                 timer_expires   = icsk->icsk_timeout;
2018         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2019                 timer_active    = 4;
2020                 timer_expires   = icsk->icsk_timeout;
2021         } else if (timer_pending(&sp->sk_timer)) {
2022                 timer_active    = 2;
2023                 timer_expires   = sp->sk_timer.expires;
2024         } else {
2025                 timer_active    = 0;
2026                 timer_expires = jiffies;
2027         }
2028
2029         state = inet_sk_state_load(sp);
2030         if (state == TCP_LISTEN)
2031                 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2032         else
2033                 /* Because we don't lock the socket,
2034                  * we might find a transient negative value.
2035                  */
2036                 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2037                                       READ_ONCE(tp->copied_seq), 0);
2038
2039         seq_printf(seq,
2040                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2041                    "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2042                    i,
2043                    src->s6_addr32[0], src->s6_addr32[1],
2044                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2045                    dest->s6_addr32[0], dest->s6_addr32[1],
2046                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2047                    state,
2048                    READ_ONCE(tp->write_seq) - tp->snd_una,
2049                    rx_queue,
2050                    timer_active,
2051                    jiffies_delta_to_clock_t(timer_expires - jiffies),
2052                    icsk->icsk_retransmits,
2053                    from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2054                    icsk->icsk_probes_out,
2055                    sock_i_ino(sp),
2056                    refcount_read(&sp->sk_refcnt), sp,
2057                    jiffies_to_clock_t(icsk->icsk_rto),
2058                    jiffies_to_clock_t(icsk->icsk_ack.ato),
2059                    (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2060                    tcp_snd_cwnd(tp),
2061                    state == TCP_LISTEN ?
2062                         fastopenq->max_qlen :
2063                         (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2064                    );
2065 }
2066
2067 static void get_timewait6_sock(struct seq_file *seq,
2068                                struct inet_timewait_sock *tw, int i)
2069 {
2070         long delta = tw->tw_timer.expires - jiffies;
2071         const struct in6_addr *dest, *src;
2072         __u16 destp, srcp;
2073
2074         dest = &tw->tw_v6_daddr;
2075         src  = &tw->tw_v6_rcv_saddr;
2076         destp = ntohs(tw->tw_dport);
2077         srcp  = ntohs(tw->tw_sport);
2078
2079         seq_printf(seq,
2080                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2081                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2082                    i,
2083                    src->s6_addr32[0], src->s6_addr32[1],
2084                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2085                    dest->s6_addr32[0], dest->s6_addr32[1],
2086                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2087                    tw->tw_substate, 0, 0,
2088                    3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2089                    refcount_read(&tw->tw_refcnt), tw);
2090 }
2091
2092 static int tcp6_seq_show(struct seq_file *seq, void *v)
2093 {
2094         struct tcp_iter_state *st;
2095         struct sock *sk = v;
2096
2097         if (v == SEQ_START_TOKEN) {
2098                 seq_puts(seq,
2099                          "  sl  "
2100                          "local_address                         "
2101                          "remote_address                        "
2102                          "st tx_queue rx_queue tr tm->when retrnsmt"
2103                          "   uid  timeout inode\n");
2104                 goto out;
2105         }
2106         st = seq->private;
2107
2108         if (sk->sk_state == TCP_TIME_WAIT)
2109                 get_timewait6_sock(seq, v, st->num);
2110         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2111                 get_openreq6(seq, v, st->num);
2112         else
2113                 get_tcp6_sock(seq, v, st->num);
2114 out:
2115         return 0;
2116 }
2117
2118 static const struct seq_operations tcp6_seq_ops = {
2119         .show           = tcp6_seq_show,
2120         .start          = tcp_seq_start,
2121         .next           = tcp_seq_next,
2122         .stop           = tcp_seq_stop,
2123 };
2124
2125 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2126         .family         = AF_INET6,
2127 };
2128
2129 int __net_init tcp6_proc_init(struct net *net)
2130 {
2131         if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2132                         sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2133                 return -ENOMEM;
2134         return 0;
2135 }
2136
2137 void tcp6_proc_exit(struct net *net)
2138 {
2139         remove_proc_entry("tcp6", net->proc_net);
2140 }
2141 #endif
2142
2143 struct proto tcpv6_prot = {
2144         .name                   = "TCPv6",
2145         .owner                  = THIS_MODULE,
2146         .close                  = tcp_close,
2147         .pre_connect            = tcp_v6_pre_connect,
2148         .connect                = tcp_v6_connect,
2149         .disconnect             = tcp_disconnect,
2150         .accept                 = inet_csk_accept,
2151         .ioctl                  = tcp_ioctl,
2152         .init                   = tcp_v6_init_sock,
2153         .destroy                = tcp_v4_destroy_sock,
2154         .shutdown               = tcp_shutdown,
2155         .setsockopt             = tcp_setsockopt,
2156         .getsockopt             = tcp_getsockopt,
2157         .bpf_bypass_getsockopt  = tcp_bpf_bypass_getsockopt,
2158         .keepalive              = tcp_set_keepalive,
2159         .recvmsg                = tcp_recvmsg,
2160         .sendmsg                = tcp_sendmsg,
2161         .splice_eof             = tcp_splice_eof,
2162         .sendpage               = tcp_sendpage,
2163         .backlog_rcv            = tcp_v6_do_rcv,
2164         .release_cb             = tcp_release_cb,
2165         .hash                   = inet6_hash,
2166         .unhash                 = inet_unhash,
2167         .get_port               = inet_csk_get_port,
2168         .put_port               = inet_put_port,
2169 #ifdef CONFIG_BPF_SYSCALL
2170         .psock_update_sk_prot   = tcp_bpf_update_proto,
2171 #endif
2172         .enter_memory_pressure  = tcp_enter_memory_pressure,
2173         .leave_memory_pressure  = tcp_leave_memory_pressure,
2174         .stream_memory_free     = tcp_stream_memory_free,
2175         .sockets_allocated      = &tcp_sockets_allocated,
2176
2177         .memory_allocated       = &tcp_memory_allocated,
2178         .per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
2179
2180         .memory_pressure        = &tcp_memory_pressure,
2181         .orphan_count           = &tcp_orphan_count,
2182         .sysctl_mem             = sysctl_tcp_mem,
2183         .sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2184         .sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2185         .max_header             = MAX_TCP_HEADER,
2186         .obj_size               = sizeof(struct tcp6_sock),
2187         .slab_flags             = SLAB_TYPESAFE_BY_RCU,
2188         .twsk_prot              = &tcp6_timewait_sock_ops,
2189         .rsk_prot               = &tcp6_request_sock_ops,
2190         .h.hashinfo             = NULL,
2191         .no_autobind            = true,
2192         .diag_destroy           = tcp_abort,
2193 };
2194 EXPORT_SYMBOL_GPL(tcpv6_prot);
2195
2196 static const struct inet6_protocol tcpv6_protocol = {
2197         .handler        =       tcp_v6_rcv,
2198         .err_handler    =       tcp_v6_err,
2199         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2200 };
2201
2202 static struct inet_protosw tcpv6_protosw = {
2203         .type           =       SOCK_STREAM,
2204         .protocol       =       IPPROTO_TCP,
2205         .prot           =       &tcpv6_prot,
2206         .ops            =       &inet6_stream_ops,
2207         .flags          =       INET_PROTOSW_PERMANENT |
2208                                 INET_PROTOSW_ICSK,
2209 };
2210
2211 static int __net_init tcpv6_net_init(struct net *net)
2212 {
2213         return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2214                                     SOCK_RAW, IPPROTO_TCP, net);
2215 }
2216
2217 static void __net_exit tcpv6_net_exit(struct net *net)
2218 {
2219         inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2220 }
2221
2222 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2223 {
2224         tcp_twsk_purge(net_exit_list, AF_INET6);
2225 }
2226
2227 static struct pernet_operations tcpv6_net_ops = {
2228         .init       = tcpv6_net_init,
2229         .exit       = tcpv6_net_exit,
2230         .exit_batch = tcpv6_net_exit_batch,
2231 };
2232
2233 int __init tcpv6_init(void)
2234 {
2235         int ret;
2236
2237         ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2238         if (ret)
2239                 goto out;
2240
2241         /* register inet6 protocol */
2242         ret = inet6_register_protosw(&tcpv6_protosw);
2243         if (ret)
2244                 goto out_tcpv6_protocol;
2245
2246         ret = register_pernet_subsys(&tcpv6_net_ops);
2247         if (ret)
2248                 goto out_tcpv6_protosw;
2249
2250         ret = mptcpv6_init();
2251         if (ret)
2252                 goto out_tcpv6_pernet_subsys;
2253
2254 out:
2255         return ret;
2256
2257 out_tcpv6_pernet_subsys:
2258         unregister_pernet_subsys(&tcpv6_net_ops);
2259 out_tcpv6_protosw:
2260         inet6_unregister_protosw(&tcpv6_protosw);
2261 out_tcpv6_protocol:
2262         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2263         goto out;
2264 }
2265
2266 void tcpv6_exit(void)
2267 {
2268         unregister_pernet_subsys(&tcpv6_net_ops);
2269         inet6_unregister_protosw(&tcpv6_protosw);
2270         inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2271 }