1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
90 /* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 struct dst_entry *dst = skb_dst(skb);
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->source);
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
136 if (addr_len < SIN6_LEN_RFC2133)
139 sock_owned_by_me(sk);
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
155 struct dst_entry *dst;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 if (addr_len < SIN6_LEN_RFC2133)
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
166 memset(&fl6, 0, sizeof(fl6));
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (IS_ERR(flowlabel))
176 fl6_sock_release(flowlabel);
181 * connect() to INADDR_ANY means loopback (BSD'ism).
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
189 usin->sin6_addr = in6addr_loopback;
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
194 if (addr_type & IPV6_ADDR_MULTICAST)
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
218 WRITE_ONCE(tp->write_seq, 0);
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
232 if (__ipv6_only_sock(sk))
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
256 np->saddr = sk->sk_v6_rcv_saddr;
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowi6_oif = sk->sk_bound_dev_if;
268 fl6.flowi6_mark = sk->sk_mark;
269 fl6.fl6_dport = usin->sin6_port;
270 fl6.fl6_sport = inet->inet_sport;
271 fl6.flowi6_uid = sk->sk_uid;
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(&fl6, opt, &final);
276 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
286 sk->sk_v6_rcv_saddr = *saddr;
289 /* set the source address */
291 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293 sk->sk_gso_type = SKB_GSO_TCPV6;
294 ip6_dst_store(sk, dst, NULL, NULL);
296 icsk->icsk_ext_hdr_len = 0;
298 icsk->icsk_ext_hdr_len = opt->opt_flen +
301 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303 inet->inet_dport = usin->sin6_port;
305 tcp_set_state(sk, TCP_SYN_SENT);
306 err = inet6_hash_connect(tcp_death_row, sk);
312 if (likely(!tp->repair)) {
314 WRITE_ONCE(tp->write_seq,
315 secure_tcpv6_seq(np->saddr.s6_addr32,
316 sk->sk_v6_daddr.s6_addr32,
319 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
321 sk->sk_v6_daddr.s6_addr32);
324 if (tcp_fastopen_defer_connect(sk, &err))
329 err = tcp_connect(sk);
336 tcp_set_state(sk, TCP_CLOSE);
338 inet->inet_dport = 0;
339 sk->sk_route_caps = 0;
343 static void tcp_v6_mtu_reduced(struct sock *sk)
345 struct dst_entry *dst;
348 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
351 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
353 /* Drop requests trying to increase our current mss.
354 * Check done in __ip6_rt_update_pmtu() is too late.
356 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
359 dst = inet6_csk_update_pmtu(sk, mtu);
363 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
364 tcp_sync_mss(sk, dst_mtu(dst));
365 tcp_simple_retransmit(sk);
369 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
370 u8 type, u8 code, int offset, __be32 info)
372 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
373 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
374 struct net *net = dev_net(skb->dev);
375 struct request_sock *fastopen;
376 struct ipv6_pinfo *np;
383 sk = __inet6_lookup_established(net, &tcp_hashinfo,
384 &hdr->daddr, th->dest,
385 &hdr->saddr, ntohs(th->source),
386 skb->dev->ifindex, inet6_sdif(skb));
389 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
394 if (sk->sk_state == TCP_TIME_WAIT) {
395 inet_twsk_put(inet_twsk(sk));
398 seq = ntohl(th->seq);
399 fatal = icmpv6_err_convert(type, code, &err);
400 if (sk->sk_state == TCP_NEW_SYN_RECV) {
401 tcp_req_err(sk, seq, fatal);
406 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
407 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
409 if (sk->sk_state == TCP_CLOSE)
412 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
413 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
418 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
419 fastopen = rcu_dereference(tp->fastopen_rsk);
420 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
421 if (sk->sk_state != TCP_LISTEN &&
422 !between(seq, snd_una, tp->snd_nxt)) {
423 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
427 np = tcp_inet6_sk(sk);
429 if (type == NDISC_REDIRECT) {
430 if (!sock_owned_by_user(sk)) {
431 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
434 dst->ops->redirect(dst, sk, skb);
439 if (type == ICMPV6_PKT_TOOBIG) {
440 u32 mtu = ntohl(info);
442 /* We are not interested in TCP_LISTEN and open_requests
443 * (SYN-ACKs send out by Linux are always <576bytes so
444 * they should go through unfragmented).
446 if (sk->sk_state == TCP_LISTEN)
449 if (!ip6_sk_accept_pmtu(sk))
452 if (mtu < IPV6_MIN_MTU)
455 WRITE_ONCE(tp->mtu_info, mtu);
457 if (!sock_owned_by_user(sk))
458 tcp_v6_mtu_reduced(sk);
459 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
466 /* Might be for an request_sock */
467 switch (sk->sk_state) {
470 /* Only in fast or simultaneous open. If a fast open socket is
471 * is already accepted it is treated as a connected one below.
473 if (fastopen && !fastopen->sk)
476 if (!sock_owned_by_user(sk)) {
478 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
482 sk->sk_err_soft = err;
486 if (!sock_owned_by_user(sk) && np->recverr) {
488 sk->sk_error_report(sk);
490 sk->sk_err_soft = err;
499 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501 struct request_sock *req,
502 struct tcp_fastopen_cookie *foc,
503 enum tcp_synack_type synack_type)
505 struct inet_request_sock *ireq = inet_rsk(req);
506 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
507 struct ipv6_txoptions *opt;
508 struct flowi6 *fl6 = &fl->u.ip6;
512 /* First, grab a route. */
513 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
514 IPPROTO_TCP)) == NULL)
517 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
520 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
521 &ireq->ir_v6_rmt_addr);
523 fl6->daddr = ireq->ir_v6_rmt_addr;
524 if (np->repflow && ireq->pktopts)
525 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
528 opt = ireq->ipv6_opt;
530 opt = rcu_dereference(np->opt);
531 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
532 np->tclass, sk->sk_priority);
534 err = net_xmit_eval(err);
542 static void tcp_v6_reqsk_destructor(struct request_sock *req)
544 kfree(inet_rsk(req)->ipv6_opt);
545 kfree_skb(inet_rsk(req)->pktopts);
548 #ifdef CONFIG_TCP_MD5SIG
549 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
550 const struct in6_addr *addr)
552 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
555 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
556 const struct sock *addr_sk)
558 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
561 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
562 char __user *optval, int optlen)
564 struct tcp_md5sig cmd;
565 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
568 if (optlen < sizeof(cmd))
571 if (copy_from_user(&cmd, optval, sizeof(cmd)))
574 if (sin6->sin6_family != AF_INET6)
577 if (optname == TCP_MD5SIG_EXT &&
578 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
579 prefixlen = cmd.tcpm_prefixlen;
580 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
584 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
587 if (!cmd.tcpm_keylen) {
588 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
591 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
592 AF_INET6, prefixlen);
595 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
598 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
599 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
600 AF_INET, prefixlen, cmd.tcpm_key,
601 cmd.tcpm_keylen, GFP_KERNEL);
603 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
604 AF_INET6, prefixlen, cmd.tcpm_key,
605 cmd.tcpm_keylen, GFP_KERNEL);
608 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
609 const struct in6_addr *daddr,
610 const struct in6_addr *saddr,
611 const struct tcphdr *th, int nbytes)
613 struct tcp6_pseudohdr *bp;
614 struct scatterlist sg;
618 /* 1. TCP pseudo-header (RFC2460) */
621 bp->protocol = cpu_to_be32(IPPROTO_TCP);
622 bp->len = cpu_to_be32(nbytes);
624 _th = (struct tcphdr *)(bp + 1);
625 memcpy(_th, th, sizeof(*th));
628 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
629 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
630 sizeof(*bp) + sizeof(*th));
631 return crypto_ahash_update(hp->md5_req);
634 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
635 const struct in6_addr *daddr, struct in6_addr *saddr,
636 const struct tcphdr *th)
638 struct tcp_md5sig_pool *hp;
639 struct ahash_request *req;
641 hp = tcp_get_md5sig_pool();
643 goto clear_hash_noput;
646 if (crypto_ahash_init(req))
648 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
650 if (tcp_md5_hash_key(hp, key))
652 ahash_request_set_crypt(req, NULL, md5_hash, 0);
653 if (crypto_ahash_final(req))
656 tcp_put_md5sig_pool();
660 tcp_put_md5sig_pool();
662 memset(md5_hash, 0, 16);
666 static int tcp_v6_md5_hash_skb(char *md5_hash,
667 const struct tcp_md5sig_key *key,
668 const struct sock *sk,
669 const struct sk_buff *skb)
671 const struct in6_addr *saddr, *daddr;
672 struct tcp_md5sig_pool *hp;
673 struct ahash_request *req;
674 const struct tcphdr *th = tcp_hdr(skb);
676 if (sk) { /* valid for establish/request sockets */
677 saddr = &sk->sk_v6_rcv_saddr;
678 daddr = &sk->sk_v6_daddr;
680 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
681 saddr = &ip6h->saddr;
682 daddr = &ip6h->daddr;
685 hp = tcp_get_md5sig_pool();
687 goto clear_hash_noput;
690 if (crypto_ahash_init(req))
693 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
695 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
697 if (tcp_md5_hash_key(hp, key))
699 ahash_request_set_crypt(req, NULL, md5_hash, 0);
700 if (crypto_ahash_final(req))
703 tcp_put_md5sig_pool();
707 tcp_put_md5sig_pool();
709 memset(md5_hash, 0, 16);
715 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
716 const struct sk_buff *skb)
718 #ifdef CONFIG_TCP_MD5SIG
719 const __u8 *hash_location = NULL;
720 struct tcp_md5sig_key *hash_expected;
721 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
722 const struct tcphdr *th = tcp_hdr(skb);
726 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
727 hash_location = tcp_parse_md5sig_option(th);
729 /* We've parsed the options - do we have a hash? */
730 if (!hash_expected && !hash_location)
733 if (hash_expected && !hash_location) {
734 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
738 if (!hash_expected && hash_location) {
739 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
743 /* check the signature */
744 genhash = tcp_v6_md5_hash_skb(newhash,
748 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
749 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
750 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
751 genhash ? "failed" : "mismatch",
752 &ip6h->saddr, ntohs(th->source),
753 &ip6h->daddr, ntohs(th->dest));
760 static void tcp_v6_init_req(struct request_sock *req,
761 const struct sock *sk_listener,
764 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
765 struct inet_request_sock *ireq = inet_rsk(req);
766 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
768 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
769 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
771 /* So that link locals have meaning */
772 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
773 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
774 ireq->ir_iif = tcp_v6_iif(skb);
776 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
777 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
778 np->rxopt.bits.rxinfo ||
779 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
780 np->rxopt.bits.rxohlim || np->repflow)) {
781 refcount_inc(&skb->users);
786 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
788 const struct request_sock *req)
790 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
793 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
795 .obj_size = sizeof(struct tcp6_request_sock),
796 .rtx_syn_ack = tcp_rtx_synack,
797 .send_ack = tcp_v6_reqsk_send_ack,
798 .destructor = tcp_v6_reqsk_destructor,
799 .send_reset = tcp_v6_send_reset,
800 .syn_ack_timeout = tcp_syn_ack_timeout,
803 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
804 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
805 sizeof(struct ipv6hdr),
806 #ifdef CONFIG_TCP_MD5SIG
807 .req_md5_lookup = tcp_v6_md5_lookup,
808 .calc_md5_hash = tcp_v6_md5_hash_skb,
810 .init_req = tcp_v6_init_req,
811 #ifdef CONFIG_SYN_COOKIES
812 .cookie_init_seq = cookie_v6_init_sequence,
814 .route_req = tcp_v6_route_req,
815 .init_seq = tcp_v6_init_seq,
816 .init_ts_off = tcp_v6_init_ts_off,
817 .send_synack = tcp_v6_send_synack,
820 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
821 u32 ack, u32 win, u32 tsval, u32 tsecr,
822 int oif, struct tcp_md5sig_key *key, int rst,
823 u8 tclass, __be32 label, u32 priority)
825 const struct tcphdr *th = tcp_hdr(skb);
827 struct sk_buff *buff;
829 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
830 struct sock *ctl_sk = net->ipv6.tcp_sk;
831 unsigned int tot_len = sizeof(struct tcphdr);
832 struct dst_entry *dst;
837 tot_len += TCPOLEN_TSTAMP_ALIGNED;
838 #ifdef CONFIG_TCP_MD5SIG
840 tot_len += TCPOLEN_MD5SIG_ALIGNED;
843 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
848 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
850 t1 = skb_push(buff, tot_len);
851 skb_reset_transport_header(buff);
853 /* Swap the send and the receive. */
854 memset(t1, 0, sizeof(*t1));
855 t1->dest = th->source;
856 t1->source = th->dest;
857 t1->doff = tot_len / 4;
858 t1->seq = htonl(seq);
859 t1->ack_seq = htonl(ack);
860 t1->ack = !rst || !th->ack;
862 t1->window = htons(win);
864 topt = (__be32 *)(t1 + 1);
867 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
868 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
869 *topt++ = htonl(tsval);
870 *topt++ = htonl(tsecr);
873 #ifdef CONFIG_TCP_MD5SIG
875 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
876 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
877 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
878 &ipv6_hdr(skb)->saddr,
879 &ipv6_hdr(skb)->daddr, t1);
883 memset(&fl6, 0, sizeof(fl6));
884 fl6.daddr = ipv6_hdr(skb)->saddr;
885 fl6.saddr = ipv6_hdr(skb)->daddr;
886 fl6.flowlabel = label;
888 buff->ip_summed = CHECKSUM_PARTIAL;
891 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
893 fl6.flowi6_proto = IPPROTO_TCP;
894 if (rt6_need_strict(&fl6.daddr) && !oif)
895 fl6.flowi6_oif = tcp_v6_iif(skb);
897 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
900 fl6.flowi6_oif = oif;
904 if (sk->sk_state == TCP_TIME_WAIT) {
905 mark = inet_twsk(sk)->tw_mark;
906 /* autoflowlabel relies on buff->hash */
907 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
912 buff->tstamp = tcp_transmit_time(sk);
914 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
915 fl6.fl6_dport = t1->dest;
916 fl6.fl6_sport = t1->source;
917 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
918 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
920 /* Pass a socket to ip6_dst_lookup either it is for RST
921 * Underlying function will use this to retrieve the network
924 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
926 skb_dst_set(buff, dst);
927 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
929 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
931 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
938 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
940 const struct tcphdr *th = tcp_hdr(skb);
941 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
942 u32 seq = 0, ack_seq = 0;
943 struct tcp_md5sig_key *key = NULL;
944 #ifdef CONFIG_TCP_MD5SIG
945 const __u8 *hash_location = NULL;
946 unsigned char newhash[16];
948 struct sock *sk1 = NULL;
958 /* If sk not NULL, it means we did a successful lookup and incoming
959 * route had to be correct. prequeue might have dropped our dst.
961 if (!sk && !ipv6_unicast_destination(skb))
964 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
965 #ifdef CONFIG_TCP_MD5SIG
967 hash_location = tcp_parse_md5sig_option(th);
968 if (sk && sk_fullsock(sk)) {
969 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
970 } else if (hash_location) {
972 * active side is lost. Try to find listening socket through
973 * source port, and then find md5 key through listening socket.
974 * we are not loose security here:
975 * Incoming packet is checked with md5 hash with finding key,
976 * no RST generated if md5 hash doesn't match.
978 sk1 = inet6_lookup_listener(net,
979 &tcp_hashinfo, NULL, 0,
981 th->source, &ipv6h->daddr,
983 tcp_v6_iif_l3_slave(skb),
988 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
992 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
993 if (genhash || memcmp(hash_location, newhash, 16) != 0)
999 seq = ntohl(th->ack_seq);
1001 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1005 oif = sk->sk_bound_dev_if;
1006 if (sk_fullsock(sk)) {
1007 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1009 trace_tcp_send_reset(sk, skb);
1011 label = ip6_flowlabel(ipv6h);
1012 priority = sk->sk_priority;
1014 if (sk->sk_state == TCP_TIME_WAIT) {
1015 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1016 priority = inet_twsk(sk)->tw_priority;
1019 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1020 label = ip6_flowlabel(ipv6h);
1023 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1026 #ifdef CONFIG_TCP_MD5SIG
1032 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1033 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1034 struct tcp_md5sig_key *key, u8 tclass,
1035 __be32 label, u32 priority)
1037 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1038 tclass, label, priority);
1041 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1043 struct inet_timewait_sock *tw = inet_twsk(sk);
1044 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1046 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1047 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1048 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1049 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1050 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1055 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1056 struct request_sock *req)
1058 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1059 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1062 * The window field (SEG.WND) of every outgoing segment, with the
1063 * exception of <SYN> segments, MUST be right-shifted by
1064 * Rcv.Wind.Shift bits:
1066 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1067 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1068 tcp_rsk(req)->rcv_nxt,
1069 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1070 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1071 req->ts_recent, sk->sk_bound_dev_if,
1072 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1073 0, 0, sk->sk_priority);
1077 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1079 #ifdef CONFIG_SYN_COOKIES
1080 const struct tcphdr *th = tcp_hdr(skb);
1083 sk = cookie_v6_check(sk, skb);
1088 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1089 struct tcphdr *th, u32 *cookie)
1092 #ifdef CONFIG_SYN_COOKIES
1093 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1094 &tcp_request_sock_ipv6_ops, sk, th);
1096 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1097 tcp_synq_overflow(sk);
1103 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1105 if (skb->protocol == htons(ETH_P_IP))
1106 return tcp_v4_conn_request(sk, skb);
1108 if (!ipv6_unicast_destination(skb))
1111 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1112 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1116 return tcp_conn_request(&tcp6_request_sock_ops,
1117 &tcp_request_sock_ipv6_ops, sk, skb);
1121 return 0; /* don't send reset */
1124 static void tcp_v6_restore_cb(struct sk_buff *skb)
1126 /* We need to move header back to the beginning if xfrm6_policy_check()
1127 * and tcp_v6_fill_cb() are going to be called again.
1128 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1130 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1131 sizeof(struct inet6_skb_parm));
1134 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1135 struct request_sock *req,
1136 struct dst_entry *dst,
1137 struct request_sock *req_unhash,
1140 struct inet_request_sock *ireq;
1141 struct ipv6_pinfo *newnp;
1142 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1143 struct ipv6_txoptions *opt;
1144 struct inet_sock *newinet;
1145 bool found_dup_sk = false;
1146 struct tcp_sock *newtp;
1148 #ifdef CONFIG_TCP_MD5SIG
1149 struct tcp_md5sig_key *key;
1153 if (skb->protocol == htons(ETH_P_IP)) {
1158 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1159 req_unhash, own_req);
1164 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1166 newinet = inet_sk(newsk);
1167 newnp = tcp_inet6_sk(newsk);
1168 newtp = tcp_sk(newsk);
1170 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1172 newnp->saddr = newsk->sk_v6_rcv_saddr;
1174 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1175 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1176 #ifdef CONFIG_TCP_MD5SIG
1177 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1180 newnp->ipv6_mc_list = NULL;
1181 newnp->ipv6_ac_list = NULL;
1182 newnp->ipv6_fl_list = NULL;
1183 newnp->pktoptions = NULL;
1185 newnp->mcast_oif = inet_iif(skb);
1186 newnp->mcast_hops = ip_hdr(skb)->ttl;
1187 newnp->rcv_flowinfo = 0;
1189 newnp->flow_label = 0;
1192 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1193 * here, tcp_create_openreq_child now does this for us, see the comment in
1194 * that function for the gory details. -acme
1197 /* It is tricky place. Until this moment IPv4 tcp
1198 worked with IPv6 icsk.icsk_af_ops.
1201 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1206 ireq = inet_rsk(req);
1208 if (sk_acceptq_is_full(sk))
1212 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1217 newsk = tcp_create_openreq_child(sk, req, skb);
1222 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1223 * count here, tcp_create_openreq_child now does this for us, see the
1224 * comment in that function for the gory details. -acme
1227 newsk->sk_gso_type = SKB_GSO_TCPV6;
1228 ip6_dst_store(newsk, dst, NULL, NULL);
1229 inet6_sk_rx_dst_set(newsk, skb);
1231 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1233 newtp = tcp_sk(newsk);
1234 newinet = inet_sk(newsk);
1235 newnp = tcp_inet6_sk(newsk);
1237 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1239 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1240 newnp->saddr = ireq->ir_v6_loc_addr;
1241 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1242 newsk->sk_bound_dev_if = ireq->ir_iif;
1244 /* Now IPv6 options...
1246 First: no IPv4 options.
1248 newinet->inet_opt = NULL;
1249 newnp->ipv6_mc_list = NULL;
1250 newnp->ipv6_ac_list = NULL;
1251 newnp->ipv6_fl_list = NULL;
1254 newnp->rxopt.all = np->rxopt.all;
1256 newnp->pktoptions = NULL;
1258 newnp->mcast_oif = tcp_v6_iif(skb);
1259 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1260 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1262 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1264 /* Clone native IPv6 options from listening socket (if any)
1266 Yes, keeping reference count would be much more clever,
1267 but we make one more one thing there: reattach optmem
1270 opt = ireq->ipv6_opt;
1272 opt = rcu_dereference(np->opt);
1274 opt = ipv6_dup_options(newsk, opt);
1275 RCU_INIT_POINTER(newnp->opt, opt);
1277 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1279 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1282 tcp_ca_openreq_child(newsk, dst);
1284 tcp_sync_mss(newsk, dst_mtu(dst));
1285 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1287 tcp_initialize_rcv_mss(newsk);
1289 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1290 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1292 #ifdef CONFIG_TCP_MD5SIG
1293 /* Copy over the MD5 key from the original socket */
1294 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1296 /* We're using one, so create a matching key
1297 * on the newsk structure. If we fail to get
1298 * memory, then we end up not copying the key
1301 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1302 AF_INET6, 128, key->key, key->keylen,
1303 sk_gfp_mask(sk, GFP_ATOMIC));
1307 if (__inet_inherit_port(sk, newsk) < 0) {
1308 inet_csk_prepare_forced_close(newsk);
1312 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1315 tcp_move_syn(newtp, req);
1317 /* Clone pktoptions received with SYN, if we own the req */
1318 if (ireq->pktopts) {
1319 newnp->pktoptions = skb_clone(ireq->pktopts,
1320 sk_gfp_mask(sk, GFP_ATOMIC));
1321 consume_skb(ireq->pktopts);
1322 ireq->pktopts = NULL;
1323 if (newnp->pktoptions) {
1324 tcp_v6_restore_cb(newnp->pktoptions);
1325 skb_set_owner_r(newnp->pktoptions, newsk);
1329 if (!req_unhash && found_dup_sk) {
1330 /* This code path should only be executed in the
1331 * syncookie case only
1333 bh_unlock_sock(newsk);
1342 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1350 /* The socket must have it's spinlock held when we get
1351 * here, unless it is a TCP_LISTEN socket.
1353 * We have a potential double-lock case here, so even when
1354 * doing backlog processing we use the BH locking scheme.
1355 * This is because we cannot sleep with the original spinlock
1358 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1360 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1361 struct sk_buff *opt_skb = NULL;
1362 struct tcp_sock *tp;
1364 /* Imagine: socket is IPv6. IPv4 packet arrives,
1365 goes to IPv4 receive handler and backlogged.
1366 From backlog it always goes here. Kerboom...
1367 Fortunately, tcp_rcv_established and rcv_established
1368 handle them correctly, but it is not case with
1369 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1372 if (skb->protocol == htons(ETH_P_IP))
1373 return tcp_v4_do_rcv(sk, skb);
1376 * socket locking is here for SMP purposes as backlog rcv
1377 * is currently called with bh processing disabled.
1380 /* Do Stevens' IPV6_PKTOPTIONS.
1382 Yes, guys, it is the only place in our code, where we
1383 may make it not affecting IPv4.
1384 The rest of code is protocol independent,
1385 and I do not like idea to uglify IPv4.
1387 Actually, all the idea behind IPV6_PKTOPTIONS
1388 looks not very well thought. For now we latch
1389 options, received in the last packet, enqueued
1390 by tcp. Feel free to propose better solution.
1394 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1396 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1397 struct dst_entry *dst = sk->sk_rx_dst;
1399 sock_rps_save_rxhash(sk, skb);
1400 sk_mark_napi_id(sk, skb);
1402 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1403 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1405 sk->sk_rx_dst = NULL;
1409 tcp_rcv_established(sk, skb);
1411 goto ipv6_pktoptions;
1415 if (tcp_checksum_complete(skb))
1418 if (sk->sk_state == TCP_LISTEN) {
1419 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1425 if (tcp_child_process(sk, nsk, skb))
1428 __kfree_skb(opt_skb);
1432 sock_rps_save_rxhash(sk, skb);
1434 if (tcp_rcv_state_process(sk, skb))
1437 goto ipv6_pktoptions;
1441 tcp_v6_send_reset(sk, skb);
1444 __kfree_skb(opt_skb);
1448 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1449 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1454 /* Do you ask, what is it?
1456 1. skb was enqueued by tcp.
1457 2. skb is added to tail of read queue, rather than out of order.
1458 3. socket is not in passive state.
1459 4. Finally, it really contains options, which user wants to receive.
1462 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1463 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1464 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1465 np->mcast_oif = tcp_v6_iif(opt_skb);
1466 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1467 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1468 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1469 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1471 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1472 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1473 skb_set_owner_r(opt_skb, sk);
1474 tcp_v6_restore_cb(opt_skb);
1475 opt_skb = xchg(&np->pktoptions, opt_skb);
1477 __kfree_skb(opt_skb);
1478 opt_skb = xchg(&np->pktoptions, NULL);
1486 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1487 const struct tcphdr *th)
1489 /* This is tricky: we move IP6CB at its correct location into
1490 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1491 * _decode_session6() uses IP6CB().
1492 * barrier() makes sure compiler won't play aliasing games.
1494 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1495 sizeof(struct inet6_skb_parm));
1498 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1499 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1500 skb->len - th->doff*4);
1501 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1502 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1503 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1504 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1505 TCP_SKB_CB(skb)->sacked = 0;
1506 TCP_SKB_CB(skb)->has_rxtstamp =
1507 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1510 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1512 struct sk_buff *skb_to_free;
1513 int sdif = inet6_sdif(skb);
1514 const struct tcphdr *th;
1515 const struct ipv6hdr *hdr;
1519 struct net *net = dev_net(skb->dev);
1521 if (skb->pkt_type != PACKET_HOST)
1525 * Count it even if it's bad.
1527 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1529 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1532 th = (const struct tcphdr *)skb->data;
1534 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1536 if (!pskb_may_pull(skb, th->doff*4))
1539 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1542 th = (const struct tcphdr *)skb->data;
1543 hdr = ipv6_hdr(skb);
1546 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1547 th->source, th->dest, inet6_iif(skb), sdif,
1553 if (sk->sk_state == TCP_TIME_WAIT)
1556 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1557 struct request_sock *req = inet_reqsk(sk);
1558 bool req_stolen = false;
1561 sk = req->rsk_listener;
1562 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1563 sk_drops_add(sk, skb);
1567 if (tcp_checksum_complete(skb)) {
1571 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1572 inet_csk_reqsk_queue_drop_and_put(sk, req);
1578 if (!tcp_filter(sk, skb)) {
1579 th = (const struct tcphdr *)skb->data;
1580 hdr = ipv6_hdr(skb);
1581 tcp_v6_fill_cb(skb, hdr, th);
1582 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1587 /* Another cpu got exclusive access to req
1588 * and created a full blown socket.
1589 * Try to feed this packet to this socket
1590 * instead of discarding it.
1592 tcp_v6_restore_cb(skb);
1596 goto discard_and_relse;
1600 tcp_v6_restore_cb(skb);
1601 } else if (tcp_child_process(sk, nsk, skb)) {
1602 tcp_v6_send_reset(nsk, skb);
1603 goto discard_and_relse;
1609 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1610 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1611 goto discard_and_relse;
1614 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1615 goto discard_and_relse;
1617 if (tcp_v6_inbound_md5_hash(sk, skb))
1618 goto discard_and_relse;
1620 if (tcp_filter(sk, skb))
1621 goto discard_and_relse;
1622 th = (const struct tcphdr *)skb->data;
1623 hdr = ipv6_hdr(skb);
1624 tcp_v6_fill_cb(skb, hdr, th);
1628 if (sk->sk_state == TCP_LISTEN) {
1629 ret = tcp_v6_do_rcv(sk, skb);
1630 goto put_and_return;
1633 sk_incoming_cpu_update(sk);
1635 bh_lock_sock_nested(sk);
1636 tcp_segs_in(tcp_sk(sk), skb);
1638 if (!sock_owned_by_user(sk)) {
1639 skb_to_free = sk->sk_rx_skb_cache;
1640 sk->sk_rx_skb_cache = NULL;
1641 ret = tcp_v6_do_rcv(sk, skb);
1643 if (tcp_add_backlog(sk, skb))
1644 goto discard_and_relse;
1649 __kfree_skb(skb_to_free);
1653 return ret ? -1 : 0;
1656 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1659 tcp_v6_fill_cb(skb, hdr, th);
1661 if (tcp_checksum_complete(skb)) {
1663 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1665 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1667 tcp_v6_send_reset(NULL, skb);
1675 sk_drops_add(sk, skb);
1681 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1682 inet_twsk_put(inet_twsk(sk));
1686 tcp_v6_fill_cb(skb, hdr, th);
1688 if (tcp_checksum_complete(skb)) {
1689 inet_twsk_put(inet_twsk(sk));
1693 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1698 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1699 skb, __tcp_hdrlen(th),
1700 &ipv6_hdr(skb)->saddr, th->source,
1701 &ipv6_hdr(skb)->daddr,
1703 tcp_v6_iif_l3_slave(skb),
1706 struct inet_timewait_sock *tw = inet_twsk(sk);
1707 inet_twsk_deschedule_put(tw);
1709 tcp_v6_restore_cb(skb);
1717 tcp_v6_timewait_ack(sk, skb);
1720 tcp_v6_send_reset(sk, skb);
1721 inet_twsk_deschedule_put(inet_twsk(sk));
1723 case TCP_TW_SUCCESS:
1729 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1731 const struct ipv6hdr *hdr;
1732 const struct tcphdr *th;
1735 if (skb->pkt_type != PACKET_HOST)
1738 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1741 hdr = ipv6_hdr(skb);
1744 if (th->doff < sizeof(struct tcphdr) / 4)
1747 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1748 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1749 &hdr->saddr, th->source,
1750 &hdr->daddr, ntohs(th->dest),
1751 inet6_iif(skb), inet6_sdif(skb));
1754 skb->destructor = sock_edemux;
1755 if (sk_fullsock(sk)) {
1756 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1759 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1761 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1762 skb_dst_set_noref(skb, dst);
1767 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1768 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1769 .twsk_unique = tcp_twsk_unique,
1770 .twsk_destructor = tcp_twsk_destructor,
1773 static const struct inet_connection_sock_af_ops ipv6_specific = {
1774 .queue_xmit = inet6_csk_xmit,
1775 .send_check = tcp_v6_send_check,
1776 .rebuild_header = inet6_sk_rebuild_header,
1777 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1778 .conn_request = tcp_v6_conn_request,
1779 .syn_recv_sock = tcp_v6_syn_recv_sock,
1780 .net_header_len = sizeof(struct ipv6hdr),
1781 .net_frag_header_len = sizeof(struct frag_hdr),
1782 .setsockopt = ipv6_setsockopt,
1783 .getsockopt = ipv6_getsockopt,
1784 .addr2sockaddr = inet6_csk_addr2sockaddr,
1785 .sockaddr_len = sizeof(struct sockaddr_in6),
1786 #ifdef CONFIG_COMPAT
1787 .compat_setsockopt = compat_ipv6_setsockopt,
1788 .compat_getsockopt = compat_ipv6_getsockopt,
1790 .mtu_reduced = tcp_v6_mtu_reduced,
1793 #ifdef CONFIG_TCP_MD5SIG
1794 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1795 .md5_lookup = tcp_v6_md5_lookup,
1796 .calc_md5_hash = tcp_v6_md5_hash_skb,
1797 .md5_parse = tcp_v6_parse_md5_keys,
1802 * TCP over IPv4 via INET6 API
1804 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1805 .queue_xmit = ip_queue_xmit,
1806 .send_check = tcp_v4_send_check,
1807 .rebuild_header = inet_sk_rebuild_header,
1808 .sk_rx_dst_set = inet_sk_rx_dst_set,
1809 .conn_request = tcp_v6_conn_request,
1810 .syn_recv_sock = tcp_v6_syn_recv_sock,
1811 .net_header_len = sizeof(struct iphdr),
1812 .setsockopt = ipv6_setsockopt,
1813 .getsockopt = ipv6_getsockopt,
1814 .addr2sockaddr = inet6_csk_addr2sockaddr,
1815 .sockaddr_len = sizeof(struct sockaddr_in6),
1816 #ifdef CONFIG_COMPAT
1817 .compat_setsockopt = compat_ipv6_setsockopt,
1818 .compat_getsockopt = compat_ipv6_getsockopt,
1820 .mtu_reduced = tcp_v4_mtu_reduced,
1823 #ifdef CONFIG_TCP_MD5SIG
1824 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1825 .md5_lookup = tcp_v4_md5_lookup,
1826 .calc_md5_hash = tcp_v4_md5_hash_skb,
1827 .md5_parse = tcp_v6_parse_md5_keys,
1831 /* NOTE: A lot of things set to zero explicitly by call to
1832 * sk_alloc() so need not be done here.
1834 static int tcp_v6_init_sock(struct sock *sk)
1836 struct inet_connection_sock *icsk = inet_csk(sk);
1840 icsk->icsk_af_ops = &ipv6_specific;
1842 #ifdef CONFIG_TCP_MD5SIG
1843 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1849 static void tcp_v6_destroy_sock(struct sock *sk)
1851 tcp_v4_destroy_sock(sk);
1852 inet6_destroy_sock(sk);
1855 #ifdef CONFIG_PROC_FS
1856 /* Proc filesystem TCPv6 sock list dumping. */
1857 static void get_openreq6(struct seq_file *seq,
1858 const struct request_sock *req, int i)
1860 long ttd = req->rsk_timer.expires - jiffies;
1861 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1862 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1868 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1869 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1871 src->s6_addr32[0], src->s6_addr32[1],
1872 src->s6_addr32[2], src->s6_addr32[3],
1873 inet_rsk(req)->ir_num,
1874 dest->s6_addr32[0], dest->s6_addr32[1],
1875 dest->s6_addr32[2], dest->s6_addr32[3],
1876 ntohs(inet_rsk(req)->ir_rmt_port),
1878 0, 0, /* could print option size, but that is af dependent. */
1879 1, /* timers active (only the expire timer) */
1880 jiffies_to_clock_t(ttd),
1882 from_kuid_munged(seq_user_ns(seq),
1883 sock_i_uid(req->rsk_listener)),
1884 0, /* non standard timer */
1885 0, /* open_requests have no inode */
1889 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1891 const struct in6_addr *dest, *src;
1894 unsigned long timer_expires;
1895 const struct inet_sock *inet = inet_sk(sp);
1896 const struct tcp_sock *tp = tcp_sk(sp);
1897 const struct inet_connection_sock *icsk = inet_csk(sp);
1898 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1902 dest = &sp->sk_v6_daddr;
1903 src = &sp->sk_v6_rcv_saddr;
1904 destp = ntohs(inet->inet_dport);
1905 srcp = ntohs(inet->inet_sport);
1907 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1908 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1909 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1911 timer_expires = icsk->icsk_timeout;
1912 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1914 timer_expires = icsk->icsk_timeout;
1915 } else if (timer_pending(&sp->sk_timer)) {
1917 timer_expires = sp->sk_timer.expires;
1920 timer_expires = jiffies;
1923 state = inet_sk_state_load(sp);
1924 if (state == TCP_LISTEN)
1925 rx_queue = sp->sk_ack_backlog;
1927 /* Because we don't lock the socket,
1928 * we might find a transient negative value.
1930 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1931 READ_ONCE(tp->copied_seq), 0);
1934 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1935 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1937 src->s6_addr32[0], src->s6_addr32[1],
1938 src->s6_addr32[2], src->s6_addr32[3], srcp,
1939 dest->s6_addr32[0], dest->s6_addr32[1],
1940 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1942 READ_ONCE(tp->write_seq) - tp->snd_una,
1945 jiffies_delta_to_clock_t(timer_expires - jiffies),
1946 icsk->icsk_retransmits,
1947 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1948 icsk->icsk_probes_out,
1950 refcount_read(&sp->sk_refcnt), sp,
1951 jiffies_to_clock_t(icsk->icsk_rto),
1952 jiffies_to_clock_t(icsk->icsk_ack.ato),
1953 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1955 state == TCP_LISTEN ?
1956 fastopenq->max_qlen :
1957 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1961 static void get_timewait6_sock(struct seq_file *seq,
1962 struct inet_timewait_sock *tw, int i)
1964 long delta = tw->tw_timer.expires - jiffies;
1965 const struct in6_addr *dest, *src;
1968 dest = &tw->tw_v6_daddr;
1969 src = &tw->tw_v6_rcv_saddr;
1970 destp = ntohs(tw->tw_dport);
1971 srcp = ntohs(tw->tw_sport);
1974 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1975 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1977 src->s6_addr32[0], src->s6_addr32[1],
1978 src->s6_addr32[2], src->s6_addr32[3], srcp,
1979 dest->s6_addr32[0], dest->s6_addr32[1],
1980 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1981 tw->tw_substate, 0, 0,
1982 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1983 refcount_read(&tw->tw_refcnt), tw);
1986 static int tcp6_seq_show(struct seq_file *seq, void *v)
1988 struct tcp_iter_state *st;
1989 struct sock *sk = v;
1991 if (v == SEQ_START_TOKEN) {
1996 "st tx_queue rx_queue tr tm->when retrnsmt"
1997 " uid timeout inode\n");
2002 if (sk->sk_state == TCP_TIME_WAIT)
2003 get_timewait6_sock(seq, v, st->num);
2004 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2005 get_openreq6(seq, v, st->num);
2007 get_tcp6_sock(seq, v, st->num);
2012 static const struct seq_operations tcp6_seq_ops = {
2013 .show = tcp6_seq_show,
2014 .start = tcp_seq_start,
2015 .next = tcp_seq_next,
2016 .stop = tcp_seq_stop,
2019 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2023 int __net_init tcp6_proc_init(struct net *net)
2025 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2026 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2031 void tcp6_proc_exit(struct net *net)
2033 remove_proc_entry("tcp6", net->proc_net);
2037 struct proto tcpv6_prot = {
2039 .owner = THIS_MODULE,
2041 .pre_connect = tcp_v6_pre_connect,
2042 .connect = tcp_v6_connect,
2043 .disconnect = tcp_disconnect,
2044 .accept = inet_csk_accept,
2046 .init = tcp_v6_init_sock,
2047 .destroy = tcp_v6_destroy_sock,
2048 .shutdown = tcp_shutdown,
2049 .setsockopt = tcp_setsockopt,
2050 .getsockopt = tcp_getsockopt,
2051 .keepalive = tcp_set_keepalive,
2052 .recvmsg = tcp_recvmsg,
2053 .sendmsg = tcp_sendmsg,
2054 .sendpage = tcp_sendpage,
2055 .backlog_rcv = tcp_v6_do_rcv,
2056 .release_cb = tcp_release_cb,
2058 .unhash = inet_unhash,
2059 .get_port = inet_csk_get_port,
2060 .enter_memory_pressure = tcp_enter_memory_pressure,
2061 .leave_memory_pressure = tcp_leave_memory_pressure,
2062 .stream_memory_free = tcp_stream_memory_free,
2063 .sockets_allocated = &tcp_sockets_allocated,
2064 .memory_allocated = &tcp_memory_allocated,
2065 .memory_pressure = &tcp_memory_pressure,
2066 .orphan_count = &tcp_orphan_count,
2067 .sysctl_mem = sysctl_tcp_mem,
2068 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2069 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2070 .max_header = MAX_TCP_HEADER,
2071 .obj_size = sizeof(struct tcp6_sock),
2072 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2073 .twsk_prot = &tcp6_timewait_sock_ops,
2074 .rsk_prot = &tcp6_request_sock_ops,
2075 .h.hashinfo = &tcp_hashinfo,
2076 .no_autobind = true,
2077 #ifdef CONFIG_COMPAT
2078 .compat_setsockopt = compat_tcp_setsockopt,
2079 .compat_getsockopt = compat_tcp_getsockopt,
2081 .diag_destroy = tcp_abort,
2084 /* thinking of making this const? Don't.
2085 * early_demux can change based on sysctl.
2087 static struct inet6_protocol tcpv6_protocol = {
2088 .early_demux = tcp_v6_early_demux,
2089 .early_demux_handler = tcp_v6_early_demux,
2090 .handler = tcp_v6_rcv,
2091 .err_handler = tcp_v6_err,
2092 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2095 static struct inet_protosw tcpv6_protosw = {
2096 .type = SOCK_STREAM,
2097 .protocol = IPPROTO_TCP,
2098 .prot = &tcpv6_prot,
2099 .ops = &inet6_stream_ops,
2100 .flags = INET_PROTOSW_PERMANENT |
2104 static int __net_init tcpv6_net_init(struct net *net)
2106 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2107 SOCK_RAW, IPPROTO_TCP, net);
2110 static void __net_exit tcpv6_net_exit(struct net *net)
2112 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2115 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2117 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2120 static struct pernet_operations tcpv6_net_ops = {
2121 .init = tcpv6_net_init,
2122 .exit = tcpv6_net_exit,
2123 .exit_batch = tcpv6_net_exit_batch,
2126 int __init tcpv6_init(void)
2130 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2134 /* register inet6 protocol */
2135 ret = inet6_register_protosw(&tcpv6_protosw);
2137 goto out_tcpv6_protocol;
2139 ret = register_pernet_subsys(&tcpv6_net_ops);
2141 goto out_tcpv6_protosw;
2146 inet6_unregister_protosw(&tcpv6_protosw);
2148 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2152 void tcpv6_exit(void)
2154 unregister_pernet_subsys(&tcpv6_net_ops);
2155 inet6_unregister_protosw(&tcpv6_protosw);
2156 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);