1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Linux INET6 implementation
7 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
105 struct dst_entry *dst = skb_dst(skb);
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->source);
124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
137 if (addr_len < SIN6_LEN_RFC2133)
140 sock_owned_by_me(sk);
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
156 struct dst_entry *dst;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
161 if (addr_len < SIN6_LEN_RFC2133)
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
167 memset(&fl6, 0, sizeof(fl6));
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
177 fl6_sock_release(flowlabel);
182 * connect() to INADDR_ANY means loopback (BSD'ism).
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
190 usin->sin6_addr = in6addr_loopback;
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
195 if (addr_type & IPV6_ADDR_MULTICAST)
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
233 if (__ipv6_only_sock(sk))
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
240 icsk->icsk_af_ops = &ipv6_mapped;
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
261 np->saddr = sk->sk_v6_rcv_saddr;
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
291 sk->sk_v6_rcv_saddr = *saddr;
294 /* set the source address */
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
301 icsk->icsk_ext_hdr_len = 0;
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
308 inet->inet_dport = usin->sin6_port;
310 tcp_set_state(sk, TCP_SYN_SENT);
311 err = inet6_hash_connect(tcp_death_row, sk);
317 if (likely(!tp->repair)) {
319 WRITE_ONCE(tp->write_seq,
320 secure_tcpv6_seq(np->saddr.s6_addr32,
321 sk->sk_v6_daddr.s6_addr32,
324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 sk->sk_v6_daddr.s6_addr32);
329 if (tcp_fastopen_defer_connect(sk, &err))
334 err = tcp_connect(sk);
341 tcp_set_state(sk, TCP_CLOSE);
343 inet->inet_dport = 0;
344 sk->sk_route_caps = 0;
348 static void tcp_v6_mtu_reduced(struct sock *sk)
350 struct dst_entry *dst;
353 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
356 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358 /* Drop requests trying to increase our current mss.
359 * Check done in __ip6_rt_update_pmtu() is too late.
361 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
364 dst = inet6_csk_update_pmtu(sk, mtu);
368 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
369 tcp_sync_mss(sk, dst_mtu(dst));
370 tcp_simple_retransmit(sk);
374 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
375 u8 type, u8 code, int offset, __be32 info)
377 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
378 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
379 struct net *net = dev_net(skb->dev);
380 struct request_sock *fastopen;
381 struct ipv6_pinfo *np;
388 sk = __inet6_lookup_established(net, &tcp_hashinfo,
389 &hdr->daddr, th->dest,
390 &hdr->saddr, ntohs(th->source),
391 skb->dev->ifindex, inet6_sdif(skb));
394 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
399 if (sk->sk_state == TCP_TIME_WAIT) {
400 inet_twsk_put(inet_twsk(sk));
403 seq = ntohl(th->seq);
404 fatal = icmpv6_err_convert(type, code, &err);
405 if (sk->sk_state == TCP_NEW_SYN_RECV) {
406 tcp_req_err(sk, seq, fatal);
411 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
412 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414 if (sk->sk_state == TCP_CLOSE)
417 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
418 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
423 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
424 fastopen = rcu_dereference(tp->fastopen_rsk);
425 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
426 if (sk->sk_state != TCP_LISTEN &&
427 !between(seq, snd_una, tp->snd_nxt)) {
428 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
432 np = tcp_inet6_sk(sk);
434 if (type == NDISC_REDIRECT) {
435 if (!sock_owned_by_user(sk)) {
436 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
439 dst->ops->redirect(dst, sk, skb);
444 if (type == ICMPV6_PKT_TOOBIG) {
445 u32 mtu = ntohl(info);
447 /* We are not interested in TCP_LISTEN and open_requests
448 * (SYN-ACKs send out by Linux are always <576bytes so
449 * they should go through unfragmented).
451 if (sk->sk_state == TCP_LISTEN)
454 if (!ip6_sk_accept_pmtu(sk))
457 if (mtu < IPV6_MIN_MTU)
460 WRITE_ONCE(tp->mtu_info, mtu);
462 if (!sock_owned_by_user(sk))
463 tcp_v6_mtu_reduced(sk);
464 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
471 /* Might be for an request_sock */
472 switch (sk->sk_state) {
475 /* Only in fast or simultaneous open. If a fast open socket is
476 * already accepted it is treated as a connected one below.
478 if (fastopen && !fastopen->sk)
481 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
483 if (!sock_owned_by_user(sk)) {
485 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
489 sk->sk_err_soft = err;
494 /* check if this ICMP message allows revert of backoff.
497 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
498 code == ICMPV6_NOROUTE)
499 tcp_ld_RTO_revert(sk, seq);
502 if (!sock_owned_by_user(sk) && np->recverr) {
504 sk->sk_error_report(sk);
506 sk->sk_err_soft = err;
515 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
517 struct request_sock *req,
518 struct tcp_fastopen_cookie *foc,
519 enum tcp_synack_type synack_type,
520 struct sk_buff *syn_skb)
522 struct inet_request_sock *ireq = inet_rsk(req);
523 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
524 struct ipv6_txoptions *opt;
525 struct flowi6 *fl6 = &fl->u.ip6;
530 /* First, grab a route. */
531 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
532 IPPROTO_TCP)) == NULL)
535 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
538 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
539 &ireq->ir_v6_rmt_addr);
541 fl6->daddr = ireq->ir_v6_rmt_addr;
542 if (np->repflow && ireq->pktopts)
543 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
545 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
546 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
547 (np->tclass & INET_ECN_MASK) :
550 if (!INET_ECN_is_capable(tclass) &&
551 tcp_bpf_ca_needs_ecn((struct sock *)req))
552 tclass |= INET_ECN_ECT_0;
555 opt = ireq->ipv6_opt;
557 opt = rcu_dereference(np->opt);
558 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
559 tclass, sk->sk_priority);
561 err = net_xmit_eval(err);
569 static void tcp_v6_reqsk_destructor(struct request_sock *req)
571 kfree(inet_rsk(req)->ipv6_opt);
572 kfree_skb(inet_rsk(req)->pktopts);
575 #ifdef CONFIG_TCP_MD5SIG
576 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
577 const struct in6_addr *addr,
580 return tcp_md5_do_lookup(sk, l3index,
581 (union tcp_md5_addr *)addr, AF_INET6);
584 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
585 const struct sock *addr_sk)
589 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
590 addr_sk->sk_bound_dev_if);
591 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
595 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
596 sockptr_t optval, int optlen)
598 struct tcp_md5sig cmd;
599 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
603 if (optlen < sizeof(cmd))
606 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
609 if (sin6->sin6_family != AF_INET6)
612 if (optname == TCP_MD5SIG_EXT &&
613 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
614 prefixlen = cmd.tcpm_prefixlen;
615 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
619 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
622 if (optname == TCP_MD5SIG_EXT &&
623 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
624 struct net_device *dev;
627 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
628 if (dev && netif_is_l3_master(dev))
629 l3index = dev->ifindex;
632 /* ok to reference set/not set outside of rcu;
633 * right now device MUST be an L3 master
635 if (!dev || !l3index)
639 if (!cmd.tcpm_keylen) {
640 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
641 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
644 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
645 AF_INET6, prefixlen, l3index);
648 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
651 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
652 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
653 AF_INET, prefixlen, l3index,
654 cmd.tcpm_key, cmd.tcpm_keylen,
657 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
658 AF_INET6, prefixlen, l3index,
659 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
662 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
663 const struct in6_addr *daddr,
664 const struct in6_addr *saddr,
665 const struct tcphdr *th, int nbytes)
667 struct tcp6_pseudohdr *bp;
668 struct scatterlist sg;
672 /* 1. TCP pseudo-header (RFC2460) */
675 bp->protocol = cpu_to_be32(IPPROTO_TCP);
676 bp->len = cpu_to_be32(nbytes);
678 _th = (struct tcphdr *)(bp + 1);
679 memcpy(_th, th, sizeof(*th));
682 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
683 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
684 sizeof(*bp) + sizeof(*th));
685 return crypto_ahash_update(hp->md5_req);
688 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
689 const struct in6_addr *daddr, struct in6_addr *saddr,
690 const struct tcphdr *th)
692 struct tcp_md5sig_pool *hp;
693 struct ahash_request *req;
695 hp = tcp_get_md5sig_pool();
697 goto clear_hash_noput;
700 if (crypto_ahash_init(req))
702 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
704 if (tcp_md5_hash_key(hp, key))
706 ahash_request_set_crypt(req, NULL, md5_hash, 0);
707 if (crypto_ahash_final(req))
710 tcp_put_md5sig_pool();
714 tcp_put_md5sig_pool();
716 memset(md5_hash, 0, 16);
720 static int tcp_v6_md5_hash_skb(char *md5_hash,
721 const struct tcp_md5sig_key *key,
722 const struct sock *sk,
723 const struct sk_buff *skb)
725 const struct in6_addr *saddr, *daddr;
726 struct tcp_md5sig_pool *hp;
727 struct ahash_request *req;
728 const struct tcphdr *th = tcp_hdr(skb);
730 if (sk) { /* valid for establish/request sockets */
731 saddr = &sk->sk_v6_rcv_saddr;
732 daddr = &sk->sk_v6_daddr;
734 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
735 saddr = &ip6h->saddr;
736 daddr = &ip6h->daddr;
739 hp = tcp_get_md5sig_pool();
741 goto clear_hash_noput;
744 if (crypto_ahash_init(req))
747 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
749 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
751 if (tcp_md5_hash_key(hp, key))
753 ahash_request_set_crypt(req, NULL, md5_hash, 0);
754 if (crypto_ahash_final(req))
757 tcp_put_md5sig_pool();
761 tcp_put_md5sig_pool();
763 memset(md5_hash, 0, 16);
769 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
770 const struct sk_buff *skb,
773 #ifdef CONFIG_TCP_MD5SIG
774 const __u8 *hash_location = NULL;
775 struct tcp_md5sig_key *hash_expected;
776 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
777 const struct tcphdr *th = tcp_hdr(skb);
778 int genhash, l3index;
781 /* sdif set, means packet ingressed via a device
782 * in an L3 domain and dif is set to the l3mdev
784 l3index = sdif ? dif : 0;
786 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
787 hash_location = tcp_parse_md5sig_option(th);
789 /* We've parsed the options - do we have a hash? */
790 if (!hash_expected && !hash_location)
793 if (hash_expected && !hash_location) {
794 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
798 if (!hash_expected && hash_location) {
799 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
803 /* check the signature */
804 genhash = tcp_v6_md5_hash_skb(newhash,
808 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
809 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
810 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
811 genhash ? "failed" : "mismatch",
812 &ip6h->saddr, ntohs(th->source),
813 &ip6h->daddr, ntohs(th->dest), l3index);
820 static void tcp_v6_init_req(struct request_sock *req,
821 const struct sock *sk_listener,
824 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
825 struct inet_request_sock *ireq = inet_rsk(req);
826 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
828 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
829 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
831 /* So that link locals have meaning */
832 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
833 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
834 ireq->ir_iif = tcp_v6_iif(skb);
836 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
837 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
838 np->rxopt.bits.rxinfo ||
839 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
840 np->rxopt.bits.rxohlim || np->repflow)) {
841 refcount_inc(&skb->users);
846 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
849 struct request_sock *req)
851 tcp_v6_init_req(req, sk, skb);
853 if (security_inet_conn_request(sk, skb, req))
856 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
859 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
861 .obj_size = sizeof(struct tcp6_request_sock),
862 .rtx_syn_ack = tcp_rtx_synack,
863 .send_ack = tcp_v6_reqsk_send_ack,
864 .destructor = tcp_v6_reqsk_destructor,
865 .send_reset = tcp_v6_send_reset,
866 .syn_ack_timeout = tcp_syn_ack_timeout,
869 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
870 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
871 sizeof(struct ipv6hdr),
872 #ifdef CONFIG_TCP_MD5SIG
873 .req_md5_lookup = tcp_v6_md5_lookup,
874 .calc_md5_hash = tcp_v6_md5_hash_skb,
876 #ifdef CONFIG_SYN_COOKIES
877 .cookie_init_seq = cookie_v6_init_sequence,
879 .route_req = tcp_v6_route_req,
880 .init_seq = tcp_v6_init_seq,
881 .init_ts_off = tcp_v6_init_ts_off,
882 .send_synack = tcp_v6_send_synack,
885 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
886 u32 ack, u32 win, u32 tsval, u32 tsecr,
887 int oif, struct tcp_md5sig_key *key, int rst,
888 u8 tclass, __be32 label, u32 priority)
890 const struct tcphdr *th = tcp_hdr(skb);
892 struct sk_buff *buff;
894 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
895 struct sock *ctl_sk = net->ipv6.tcp_sk;
896 unsigned int tot_len = sizeof(struct tcphdr);
897 __be32 mrst = 0, *topt;
898 struct dst_entry *dst;
902 tot_len += TCPOLEN_TSTAMP_ALIGNED;
903 #ifdef CONFIG_TCP_MD5SIG
905 tot_len += TCPOLEN_MD5SIG_ALIGNED;
910 mrst = mptcp_reset_option(skb);
913 tot_len += sizeof(__be32);
917 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
922 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
924 t1 = skb_push(buff, tot_len);
925 skb_reset_transport_header(buff);
927 /* Swap the send and the receive. */
928 memset(t1, 0, sizeof(*t1));
929 t1->dest = th->source;
930 t1->source = th->dest;
931 t1->doff = tot_len / 4;
932 t1->seq = htonl(seq);
933 t1->ack_seq = htonl(ack);
934 t1->ack = !rst || !th->ack;
936 t1->window = htons(win);
938 topt = (__be32 *)(t1 + 1);
941 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
942 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
943 *topt++ = htonl(tsval);
944 *topt++ = htonl(tsecr);
950 #ifdef CONFIG_TCP_MD5SIG
952 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
953 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
954 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
955 &ipv6_hdr(skb)->saddr,
956 &ipv6_hdr(skb)->daddr, t1);
960 memset(&fl6, 0, sizeof(fl6));
961 fl6.daddr = ipv6_hdr(skb)->saddr;
962 fl6.saddr = ipv6_hdr(skb)->daddr;
963 fl6.flowlabel = label;
965 buff->ip_summed = CHECKSUM_PARTIAL;
968 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
970 fl6.flowi6_proto = IPPROTO_TCP;
971 if (rt6_need_strict(&fl6.daddr) && !oif)
972 fl6.flowi6_oif = tcp_v6_iif(skb);
974 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
977 fl6.flowi6_oif = oif;
981 if (sk->sk_state == TCP_TIME_WAIT) {
982 mark = inet_twsk(sk)->tw_mark;
983 /* autoflowlabel relies on buff->hash */
984 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
989 buff->tstamp = tcp_transmit_time(sk);
991 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
992 fl6.fl6_dport = t1->dest;
993 fl6.fl6_sport = t1->source;
994 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
995 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
997 /* Pass a socket to ip6_dst_lookup either it is for RST
998 * Underlying function will use this to retrieve the network
1001 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
1003 skb_dst_set(buff, dst);
1004 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1005 tclass & ~INET_ECN_MASK, priority);
1006 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
1008 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
1015 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
1017 const struct tcphdr *th = tcp_hdr(skb);
1018 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1019 u32 seq = 0, ack_seq = 0;
1020 struct tcp_md5sig_key *key = NULL;
1021 #ifdef CONFIG_TCP_MD5SIG
1022 const __u8 *hash_location = NULL;
1023 unsigned char newhash[16];
1025 struct sock *sk1 = NULL;
1035 /* If sk not NULL, it means we did a successful lookup and incoming
1036 * route had to be correct. prequeue might have dropped our dst.
1038 if (!sk && !ipv6_unicast_destination(skb))
1041 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1042 #ifdef CONFIG_TCP_MD5SIG
1044 hash_location = tcp_parse_md5sig_option(th);
1045 if (sk && sk_fullsock(sk)) {
1048 /* sdif set, means packet ingressed via a device
1049 * in an L3 domain and inet_iif is set to it.
1051 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1052 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1053 } else if (hash_location) {
1054 int dif = tcp_v6_iif_l3_slave(skb);
1055 int sdif = tcp_v6_sdif(skb);
1059 * active side is lost. Try to find listening socket through
1060 * source port, and then find md5 key through listening socket.
1061 * we are not loose security here:
1062 * Incoming packet is checked with md5 hash with finding key,
1063 * no RST generated if md5 hash doesn't match.
1065 sk1 = inet6_lookup_listener(net,
1066 &tcp_hashinfo, NULL, 0,
1068 th->source, &ipv6h->daddr,
1069 ntohs(th->source), dif, sdif);
1073 /* sdif set, means packet ingressed via a device
1074 * in an L3 domain and dif is set to it.
1076 l3index = tcp_v6_sdif(skb) ? dif : 0;
1078 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1082 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1083 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1089 seq = ntohl(th->ack_seq);
1091 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1095 oif = sk->sk_bound_dev_if;
1096 if (sk_fullsock(sk)) {
1097 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1099 trace_tcp_send_reset(sk, skb);
1101 label = ip6_flowlabel(ipv6h);
1102 priority = sk->sk_priority;
1104 if (sk->sk_state == TCP_TIME_WAIT) {
1105 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1106 priority = inet_twsk(sk)->tw_priority;
1109 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1110 label = ip6_flowlabel(ipv6h);
1113 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1114 ipv6_get_dsfield(ipv6h), label, priority);
1116 #ifdef CONFIG_TCP_MD5SIG
1122 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1123 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1124 struct tcp_md5sig_key *key, u8 tclass,
1125 __be32 label, u32 priority)
1127 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1128 tclass, label, priority);
1131 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1133 struct inet_timewait_sock *tw = inet_twsk(sk);
1134 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1136 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1137 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1138 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1139 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1140 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1145 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1146 struct request_sock *req)
1150 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1152 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1153 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1156 * The window field (SEG.WND) of every outgoing segment, with the
1157 * exception of <SYN> segments, MUST be right-shifted by
1158 * Rcv.Wind.Shift bits:
1160 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1161 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1162 tcp_rsk(req)->rcv_nxt,
1163 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1164 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1165 req->ts_recent, sk->sk_bound_dev_if,
1166 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1167 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1171 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1173 #ifdef CONFIG_SYN_COOKIES
1174 const struct tcphdr *th = tcp_hdr(skb);
1177 sk = cookie_v6_check(sk, skb);
1182 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1183 struct tcphdr *th, u32 *cookie)
1186 #ifdef CONFIG_SYN_COOKIES
1187 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1188 &tcp_request_sock_ipv6_ops, sk, th);
1190 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1191 tcp_synq_overflow(sk);
1197 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1199 if (skb->protocol == htons(ETH_P_IP))
1200 return tcp_v4_conn_request(sk, skb);
1202 if (!ipv6_unicast_destination(skb))
1205 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1206 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1210 return tcp_conn_request(&tcp6_request_sock_ops,
1211 &tcp_request_sock_ipv6_ops, sk, skb);
1215 return 0; /* don't send reset */
1218 static void tcp_v6_restore_cb(struct sk_buff *skb)
1220 /* We need to move header back to the beginning if xfrm6_policy_check()
1221 * and tcp_v6_fill_cb() are going to be called again.
1222 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1224 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1225 sizeof(struct inet6_skb_parm));
1228 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1229 struct request_sock *req,
1230 struct dst_entry *dst,
1231 struct request_sock *req_unhash,
1234 struct inet_request_sock *ireq;
1235 struct ipv6_pinfo *newnp;
1236 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1237 struct ipv6_txoptions *opt;
1238 struct inet_sock *newinet;
1239 bool found_dup_sk = false;
1240 struct tcp_sock *newtp;
1242 #ifdef CONFIG_TCP_MD5SIG
1243 struct tcp_md5sig_key *key;
1248 if (skb->protocol == htons(ETH_P_IP)) {
1253 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1254 req_unhash, own_req);
1259 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1261 newinet = inet_sk(newsk);
1262 newnp = tcp_inet6_sk(newsk);
1263 newtp = tcp_sk(newsk);
1265 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1267 newnp->saddr = newsk->sk_v6_rcv_saddr;
1269 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1270 if (sk_is_mptcp(newsk))
1271 mptcpv6_handle_mapped(newsk, true);
1272 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1273 #ifdef CONFIG_TCP_MD5SIG
1274 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1277 newnp->ipv6_mc_list = NULL;
1278 newnp->ipv6_ac_list = NULL;
1279 newnp->ipv6_fl_list = NULL;
1280 newnp->pktoptions = NULL;
1282 newnp->mcast_oif = inet_iif(skb);
1283 newnp->mcast_hops = ip_hdr(skb)->ttl;
1284 newnp->rcv_flowinfo = 0;
1286 newnp->flow_label = 0;
1289 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1290 * here, tcp_create_openreq_child now does this for us, see the comment in
1291 * that function for the gory details. -acme
1294 /* It is tricky place. Until this moment IPv4 tcp
1295 worked with IPv6 icsk.icsk_af_ops.
1298 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1303 ireq = inet_rsk(req);
1305 if (sk_acceptq_is_full(sk))
1309 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1314 newsk = tcp_create_openreq_child(sk, req, skb);
1319 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1320 * count here, tcp_create_openreq_child now does this for us, see the
1321 * comment in that function for the gory details. -acme
1324 newsk->sk_gso_type = SKB_GSO_TCPV6;
1325 ip6_dst_store(newsk, dst, NULL, NULL);
1326 inet6_sk_rx_dst_set(newsk, skb);
1328 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1330 newtp = tcp_sk(newsk);
1331 newinet = inet_sk(newsk);
1332 newnp = tcp_inet6_sk(newsk);
1334 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1336 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1337 newnp->saddr = ireq->ir_v6_loc_addr;
1338 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1339 newsk->sk_bound_dev_if = ireq->ir_iif;
1341 /* Now IPv6 options...
1343 First: no IPv4 options.
1345 newinet->inet_opt = NULL;
1346 newnp->ipv6_mc_list = NULL;
1347 newnp->ipv6_ac_list = NULL;
1348 newnp->ipv6_fl_list = NULL;
1351 newnp->rxopt.all = np->rxopt.all;
1353 newnp->pktoptions = NULL;
1355 newnp->mcast_oif = tcp_v6_iif(skb);
1356 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1357 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1359 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1361 /* Set ToS of the new socket based upon the value of incoming SYN.
1362 * ECT bits are set later in tcp_init_transfer().
1364 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1365 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1367 /* Clone native IPv6 options from listening socket (if any)
1369 Yes, keeping reference count would be much more clever,
1370 but we make one more one thing there: reattach optmem
1373 opt = ireq->ipv6_opt;
1375 opt = rcu_dereference(np->opt);
1377 opt = ipv6_dup_options(newsk, opt);
1378 RCU_INIT_POINTER(newnp->opt, opt);
1380 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1382 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1385 tcp_ca_openreq_child(newsk, dst);
1387 tcp_sync_mss(newsk, dst_mtu(dst));
1388 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1390 tcp_initialize_rcv_mss(newsk);
1392 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1393 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1395 #ifdef CONFIG_TCP_MD5SIG
1396 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1398 /* Copy over the MD5 key from the original socket */
1399 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1401 /* We're using one, so create a matching key
1402 * on the newsk structure. If we fail to get
1403 * memory, then we end up not copying the key
1406 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1407 AF_INET6, 128, l3index, key->key, key->keylen,
1408 sk_gfp_mask(sk, GFP_ATOMIC));
1412 if (__inet_inherit_port(sk, newsk) < 0) {
1413 inet_csk_prepare_forced_close(newsk);
1417 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1420 tcp_move_syn(newtp, req);
1422 /* Clone pktoptions received with SYN, if we own the req */
1423 if (ireq->pktopts) {
1424 newnp->pktoptions = skb_clone(ireq->pktopts,
1425 sk_gfp_mask(sk, GFP_ATOMIC));
1426 consume_skb(ireq->pktopts);
1427 ireq->pktopts = NULL;
1428 if (newnp->pktoptions) {
1429 tcp_v6_restore_cb(newnp->pktoptions);
1430 skb_set_owner_r(newnp->pktoptions, newsk);
1434 if (!req_unhash && found_dup_sk) {
1435 /* This code path should only be executed in the
1436 * syncookie case only
1438 bh_unlock_sock(newsk);
1447 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1455 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1457 /* The socket must have it's spinlock held when we get
1458 * here, unless it is a TCP_LISTEN socket.
1460 * We have a potential double-lock case here, so even when
1461 * doing backlog processing we use the BH locking scheme.
1462 * This is because we cannot sleep with the original spinlock
1465 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1467 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1468 struct sk_buff *opt_skb = NULL;
1469 struct tcp_sock *tp;
1471 /* Imagine: socket is IPv6. IPv4 packet arrives,
1472 goes to IPv4 receive handler and backlogged.
1473 From backlog it always goes here. Kerboom...
1474 Fortunately, tcp_rcv_established and rcv_established
1475 handle them correctly, but it is not case with
1476 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1479 if (skb->protocol == htons(ETH_P_IP))
1480 return tcp_v4_do_rcv(sk, skb);
1483 * socket locking is here for SMP purposes as backlog rcv
1484 * is currently called with bh processing disabled.
1487 /* Do Stevens' IPV6_PKTOPTIONS.
1489 Yes, guys, it is the only place in our code, where we
1490 may make it not affecting IPv4.
1491 The rest of code is protocol independent,
1492 and I do not like idea to uglify IPv4.
1494 Actually, all the idea behind IPV6_PKTOPTIONS
1495 looks not very well thought. For now we latch
1496 options, received in the last packet, enqueued
1497 by tcp. Feel free to propose better solution.
1501 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1503 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1504 struct dst_entry *dst = sk->sk_rx_dst;
1506 sock_rps_save_rxhash(sk, skb);
1507 sk_mark_napi_id(sk, skb);
1509 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1510 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1511 dst, np->rx_dst_cookie) == NULL) {
1513 sk->sk_rx_dst = NULL;
1517 tcp_rcv_established(sk, skb);
1519 goto ipv6_pktoptions;
1523 if (tcp_checksum_complete(skb))
1526 if (sk->sk_state == TCP_LISTEN) {
1527 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1533 if (tcp_child_process(sk, nsk, skb))
1536 __kfree_skb(opt_skb);
1540 sock_rps_save_rxhash(sk, skb);
1542 if (tcp_rcv_state_process(sk, skb))
1545 goto ipv6_pktoptions;
1549 tcp_v6_send_reset(sk, skb);
1552 __kfree_skb(opt_skb);
1556 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1557 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1562 /* Do you ask, what is it?
1564 1. skb was enqueued by tcp.
1565 2. skb is added to tail of read queue, rather than out of order.
1566 3. socket is not in passive state.
1567 4. Finally, it really contains options, which user wants to receive.
1570 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1571 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1572 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1573 np->mcast_oif = tcp_v6_iif(opt_skb);
1574 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1575 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1576 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1577 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1579 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1580 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1581 skb_set_owner_r(opt_skb, sk);
1582 tcp_v6_restore_cb(opt_skb);
1583 opt_skb = xchg(&np->pktoptions, opt_skb);
1585 __kfree_skb(opt_skb);
1586 opt_skb = xchg(&np->pktoptions, NULL);
1594 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1595 const struct tcphdr *th)
1597 /* This is tricky: we move IP6CB at its correct location into
1598 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1599 * _decode_session6() uses IP6CB().
1600 * barrier() makes sure compiler won't play aliasing games.
1602 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1603 sizeof(struct inet6_skb_parm));
1606 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1607 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1608 skb->len - th->doff*4);
1609 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1610 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1611 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1612 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1613 TCP_SKB_CB(skb)->sacked = 0;
1614 TCP_SKB_CB(skb)->has_rxtstamp =
1615 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1618 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1620 struct sk_buff *skb_to_free;
1621 int sdif = inet6_sdif(skb);
1622 int dif = inet6_iif(skb);
1623 const struct tcphdr *th;
1624 const struct ipv6hdr *hdr;
1628 struct net *net = dev_net(skb->dev);
1630 if (skb->pkt_type != PACKET_HOST)
1634 * Count it even if it's bad.
1636 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1638 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1641 th = (const struct tcphdr *)skb->data;
1643 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1645 if (!pskb_may_pull(skb, th->doff*4))
1648 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1651 th = (const struct tcphdr *)skb->data;
1652 hdr = ipv6_hdr(skb);
1655 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1656 th->source, th->dest, inet6_iif(skb), sdif,
1662 if (sk->sk_state == TCP_TIME_WAIT)
1665 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1666 struct request_sock *req = inet_reqsk(sk);
1667 bool req_stolen = false;
1670 sk = req->rsk_listener;
1671 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1672 sk_drops_add(sk, skb);
1676 if (tcp_checksum_complete(skb)) {
1680 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1681 inet_csk_reqsk_queue_drop_and_put(sk, req);
1687 if (!tcp_filter(sk, skb)) {
1688 th = (const struct tcphdr *)skb->data;
1689 hdr = ipv6_hdr(skb);
1690 tcp_v6_fill_cb(skb, hdr, th);
1691 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1696 /* Another cpu got exclusive access to req
1697 * and created a full blown socket.
1698 * Try to feed this packet to this socket
1699 * instead of discarding it.
1701 tcp_v6_restore_cb(skb);
1705 goto discard_and_relse;
1709 tcp_v6_restore_cb(skb);
1710 } else if (tcp_child_process(sk, nsk, skb)) {
1711 tcp_v6_send_reset(nsk, skb);
1712 goto discard_and_relse;
1718 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1719 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1720 goto discard_and_relse;
1723 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1724 goto discard_and_relse;
1726 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1727 goto discard_and_relse;
1729 if (tcp_filter(sk, skb))
1730 goto discard_and_relse;
1731 th = (const struct tcphdr *)skb->data;
1732 hdr = ipv6_hdr(skb);
1733 tcp_v6_fill_cb(skb, hdr, th);
1737 if (sk->sk_state == TCP_LISTEN) {
1738 ret = tcp_v6_do_rcv(sk, skb);
1739 goto put_and_return;
1742 sk_incoming_cpu_update(sk);
1744 bh_lock_sock_nested(sk);
1745 tcp_segs_in(tcp_sk(sk), skb);
1747 if (!sock_owned_by_user(sk)) {
1748 skb_to_free = sk->sk_rx_skb_cache;
1749 sk->sk_rx_skb_cache = NULL;
1750 ret = tcp_v6_do_rcv(sk, skb);
1752 if (tcp_add_backlog(sk, skb))
1753 goto discard_and_relse;
1758 __kfree_skb(skb_to_free);
1762 return ret ? -1 : 0;
1765 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1768 tcp_v6_fill_cb(skb, hdr, th);
1770 if (tcp_checksum_complete(skb)) {
1772 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1774 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1776 tcp_v6_send_reset(NULL, skb);
1784 sk_drops_add(sk, skb);
1790 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1791 inet_twsk_put(inet_twsk(sk));
1795 tcp_v6_fill_cb(skb, hdr, th);
1797 if (tcp_checksum_complete(skb)) {
1798 inet_twsk_put(inet_twsk(sk));
1802 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1807 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1808 skb, __tcp_hdrlen(th),
1809 &ipv6_hdr(skb)->saddr, th->source,
1810 &ipv6_hdr(skb)->daddr,
1812 tcp_v6_iif_l3_slave(skb),
1815 struct inet_timewait_sock *tw = inet_twsk(sk);
1816 inet_twsk_deschedule_put(tw);
1818 tcp_v6_restore_cb(skb);
1826 tcp_v6_timewait_ack(sk, skb);
1829 tcp_v6_send_reset(sk, skb);
1830 inet_twsk_deschedule_put(inet_twsk(sk));
1832 case TCP_TW_SUCCESS:
1838 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1840 const struct ipv6hdr *hdr;
1841 const struct tcphdr *th;
1844 if (skb->pkt_type != PACKET_HOST)
1847 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1850 hdr = ipv6_hdr(skb);
1853 if (th->doff < sizeof(struct tcphdr) / 4)
1856 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1857 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1858 &hdr->saddr, th->source,
1859 &hdr->daddr, ntohs(th->dest),
1860 inet6_iif(skb), inet6_sdif(skb));
1863 skb->destructor = sock_edemux;
1864 if (sk_fullsock(sk)) {
1865 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1868 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1870 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1871 skb_dst_set_noref(skb, dst);
1876 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1877 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1878 .twsk_unique = tcp_twsk_unique,
1879 .twsk_destructor = tcp_twsk_destructor,
1882 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1884 struct ipv6_pinfo *np = inet6_sk(sk);
1886 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1889 const struct inet_connection_sock_af_ops ipv6_specific = {
1890 .queue_xmit = inet6_csk_xmit,
1891 .send_check = tcp_v6_send_check,
1892 .rebuild_header = inet6_sk_rebuild_header,
1893 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1894 .conn_request = tcp_v6_conn_request,
1895 .syn_recv_sock = tcp_v6_syn_recv_sock,
1896 .net_header_len = sizeof(struct ipv6hdr),
1897 .net_frag_header_len = sizeof(struct frag_hdr),
1898 .setsockopt = ipv6_setsockopt,
1899 .getsockopt = ipv6_getsockopt,
1900 .addr2sockaddr = inet6_csk_addr2sockaddr,
1901 .sockaddr_len = sizeof(struct sockaddr_in6),
1902 .mtu_reduced = tcp_v6_mtu_reduced,
1905 #ifdef CONFIG_TCP_MD5SIG
1906 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1907 .md5_lookup = tcp_v6_md5_lookup,
1908 .calc_md5_hash = tcp_v6_md5_hash_skb,
1909 .md5_parse = tcp_v6_parse_md5_keys,
1914 * TCP over IPv4 via INET6 API
1916 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1917 .queue_xmit = ip_queue_xmit,
1918 .send_check = tcp_v4_send_check,
1919 .rebuild_header = inet_sk_rebuild_header,
1920 .sk_rx_dst_set = inet_sk_rx_dst_set,
1921 .conn_request = tcp_v6_conn_request,
1922 .syn_recv_sock = tcp_v6_syn_recv_sock,
1923 .net_header_len = sizeof(struct iphdr),
1924 .setsockopt = ipv6_setsockopt,
1925 .getsockopt = ipv6_getsockopt,
1926 .addr2sockaddr = inet6_csk_addr2sockaddr,
1927 .sockaddr_len = sizeof(struct sockaddr_in6),
1928 .mtu_reduced = tcp_v4_mtu_reduced,
1931 #ifdef CONFIG_TCP_MD5SIG
1932 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1933 .md5_lookup = tcp_v4_md5_lookup,
1934 .calc_md5_hash = tcp_v4_md5_hash_skb,
1935 .md5_parse = tcp_v6_parse_md5_keys,
1939 /* NOTE: A lot of things set to zero explicitly by call to
1940 * sk_alloc() so need not be done here.
1942 static int tcp_v6_init_sock(struct sock *sk)
1944 struct inet_connection_sock *icsk = inet_csk(sk);
1948 icsk->icsk_af_ops = &ipv6_specific;
1950 #ifdef CONFIG_TCP_MD5SIG
1951 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1957 static void tcp_v6_destroy_sock(struct sock *sk)
1959 tcp_v4_destroy_sock(sk);
1960 inet6_destroy_sock(sk);
1963 #ifdef CONFIG_PROC_FS
1964 /* Proc filesystem TCPv6 sock list dumping. */
1965 static void get_openreq6(struct seq_file *seq,
1966 const struct request_sock *req, int i)
1968 long ttd = req->rsk_timer.expires - jiffies;
1969 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1970 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1976 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1977 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1979 src->s6_addr32[0], src->s6_addr32[1],
1980 src->s6_addr32[2], src->s6_addr32[3],
1981 inet_rsk(req)->ir_num,
1982 dest->s6_addr32[0], dest->s6_addr32[1],
1983 dest->s6_addr32[2], dest->s6_addr32[3],
1984 ntohs(inet_rsk(req)->ir_rmt_port),
1986 0, 0, /* could print option size, but that is af dependent. */
1987 1, /* timers active (only the expire timer) */
1988 jiffies_to_clock_t(ttd),
1990 from_kuid_munged(seq_user_ns(seq),
1991 sock_i_uid(req->rsk_listener)),
1992 0, /* non standard timer */
1993 0, /* open_requests have no inode */
1997 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1999 const struct in6_addr *dest, *src;
2002 unsigned long timer_expires;
2003 const struct inet_sock *inet = inet_sk(sp);
2004 const struct tcp_sock *tp = tcp_sk(sp);
2005 const struct inet_connection_sock *icsk = inet_csk(sp);
2006 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2010 dest = &sp->sk_v6_daddr;
2011 src = &sp->sk_v6_rcv_saddr;
2012 destp = ntohs(inet->inet_dport);
2013 srcp = ntohs(inet->inet_sport);
2015 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2016 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2017 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2019 timer_expires = icsk->icsk_timeout;
2020 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2022 timer_expires = icsk->icsk_timeout;
2023 } else if (timer_pending(&sp->sk_timer)) {
2025 timer_expires = sp->sk_timer.expires;
2028 timer_expires = jiffies;
2031 state = inet_sk_state_load(sp);
2032 if (state == TCP_LISTEN)
2033 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2035 /* Because we don't lock the socket,
2036 * we might find a transient negative value.
2038 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2039 READ_ONCE(tp->copied_seq), 0);
2042 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2043 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2045 src->s6_addr32[0], src->s6_addr32[1],
2046 src->s6_addr32[2], src->s6_addr32[3], srcp,
2047 dest->s6_addr32[0], dest->s6_addr32[1],
2048 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2050 READ_ONCE(tp->write_seq) - tp->snd_una,
2053 jiffies_delta_to_clock_t(timer_expires - jiffies),
2054 icsk->icsk_retransmits,
2055 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2056 icsk->icsk_probes_out,
2058 refcount_read(&sp->sk_refcnt), sp,
2059 jiffies_to_clock_t(icsk->icsk_rto),
2060 jiffies_to_clock_t(icsk->icsk_ack.ato),
2061 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2063 state == TCP_LISTEN ?
2064 fastopenq->max_qlen :
2065 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2069 static void get_timewait6_sock(struct seq_file *seq,
2070 struct inet_timewait_sock *tw, int i)
2072 long delta = tw->tw_timer.expires - jiffies;
2073 const struct in6_addr *dest, *src;
2076 dest = &tw->tw_v6_daddr;
2077 src = &tw->tw_v6_rcv_saddr;
2078 destp = ntohs(tw->tw_dport);
2079 srcp = ntohs(tw->tw_sport);
2082 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2083 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2085 src->s6_addr32[0], src->s6_addr32[1],
2086 src->s6_addr32[2], src->s6_addr32[3], srcp,
2087 dest->s6_addr32[0], dest->s6_addr32[1],
2088 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2089 tw->tw_substate, 0, 0,
2090 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2091 refcount_read(&tw->tw_refcnt), tw);
2094 static int tcp6_seq_show(struct seq_file *seq, void *v)
2096 struct tcp_iter_state *st;
2097 struct sock *sk = v;
2099 if (v == SEQ_START_TOKEN) {
2104 "st tx_queue rx_queue tr tm->when retrnsmt"
2105 " uid timeout inode\n");
2110 if (sk->sk_state == TCP_TIME_WAIT)
2111 get_timewait6_sock(seq, v, st->num);
2112 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2113 get_openreq6(seq, v, st->num);
2115 get_tcp6_sock(seq, v, st->num);
2120 static const struct seq_operations tcp6_seq_ops = {
2121 .show = tcp6_seq_show,
2122 .start = tcp_seq_start,
2123 .next = tcp_seq_next,
2124 .stop = tcp_seq_stop,
2127 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2131 int __net_init tcp6_proc_init(struct net *net)
2133 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2134 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2139 void tcp6_proc_exit(struct net *net)
2141 remove_proc_entry("tcp6", net->proc_net);
2145 struct proto tcpv6_prot = {
2147 .owner = THIS_MODULE,
2149 .pre_connect = tcp_v6_pre_connect,
2150 .connect = tcp_v6_connect,
2151 .disconnect = tcp_disconnect,
2152 .accept = inet_csk_accept,
2154 .init = tcp_v6_init_sock,
2155 .destroy = tcp_v6_destroy_sock,
2156 .shutdown = tcp_shutdown,
2157 .setsockopt = tcp_setsockopt,
2158 .getsockopt = tcp_getsockopt,
2159 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2160 .keepalive = tcp_set_keepalive,
2161 .recvmsg = tcp_recvmsg,
2162 .sendmsg = tcp_sendmsg,
2163 .sendpage = tcp_sendpage,
2164 .backlog_rcv = tcp_v6_do_rcv,
2165 .release_cb = tcp_release_cb,
2167 .unhash = inet_unhash,
2168 .get_port = inet_csk_get_port,
2169 #ifdef CONFIG_BPF_SYSCALL
2170 .psock_update_sk_prot = tcp_bpf_update_proto,
2172 .enter_memory_pressure = tcp_enter_memory_pressure,
2173 .leave_memory_pressure = tcp_leave_memory_pressure,
2174 .stream_memory_free = tcp_stream_memory_free,
2175 .sockets_allocated = &tcp_sockets_allocated,
2176 .memory_allocated = &tcp_memory_allocated,
2177 .memory_pressure = &tcp_memory_pressure,
2178 .orphan_count = &tcp_orphan_count,
2179 .sysctl_mem = sysctl_tcp_mem,
2180 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2181 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2182 .max_header = MAX_TCP_HEADER,
2183 .obj_size = sizeof(struct tcp6_sock),
2184 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2185 .twsk_prot = &tcp6_timewait_sock_ops,
2186 .rsk_prot = &tcp6_request_sock_ops,
2187 .h.hashinfo = &tcp_hashinfo,
2188 .no_autobind = true,
2189 .diag_destroy = tcp_abort,
2191 EXPORT_SYMBOL_GPL(tcpv6_prot);
2193 /* thinking of making this const? Don't.
2194 * early_demux can change based on sysctl.
2196 static struct inet6_protocol tcpv6_protocol = {
2197 .early_demux = tcp_v6_early_demux,
2198 .early_demux_handler = tcp_v6_early_demux,
2199 .handler = tcp_v6_rcv,
2200 .err_handler = tcp_v6_err,
2201 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2204 static struct inet_protosw tcpv6_protosw = {
2205 .type = SOCK_STREAM,
2206 .protocol = IPPROTO_TCP,
2207 .prot = &tcpv6_prot,
2208 .ops = &inet6_stream_ops,
2209 .flags = INET_PROTOSW_PERMANENT |
2213 static int __net_init tcpv6_net_init(struct net *net)
2215 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2216 SOCK_RAW, IPPROTO_TCP, net);
2219 static void __net_exit tcpv6_net_exit(struct net *net)
2221 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2224 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2226 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2229 static struct pernet_operations tcpv6_net_ops = {
2230 .init = tcpv6_net_init,
2231 .exit = tcpv6_net_exit,
2232 .exit_batch = tcpv6_net_exit_batch,
2235 int __init tcpv6_init(void)
2239 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2243 /* register inet6 protocol */
2244 ret = inet6_register_protosw(&tcpv6_protosw);
2246 goto out_tcpv6_protocol;
2248 ret = register_pernet_subsys(&tcpv6_net_ops);
2250 goto out_tcpv6_protosw;
2252 ret = mptcpv6_init();
2254 goto out_tcpv6_pernet_subsys;
2259 out_tcpv6_pernet_subsys:
2260 unregister_pernet_subsys(&tcpv6_net_ops);
2262 inet6_unregister_protosw(&tcpv6_protosw);
2264 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2268 void tcpv6_exit(void)
2270 unregister_pernet_subsys(&tcpv6_net_ops);
2271 inet6_unregister_protosw(&tcpv6_protosw);
2272 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);