3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 #include <trace/events/tcp.h>
74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 static const struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
87 const struct in6_addr *addr)
93 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 struct dst_entry *dst = skb_dst(skb);
97 if (dst && dst_hold_safe(dst)) {
98 const struct rt6_info *rt = (const struct rt6_info *)dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
106 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
108 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
109 ipv6_hdr(skb)->saddr.s6_addr32,
111 tcp_hdr(skb)->source);
114 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
116 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
117 ipv6_hdr(skb)->saddr.s6_addr32);
120 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
123 /* This check is replicated from tcp_v6_connect() and intended to
124 * prevent BPF program called below from accessing bytes that are out
125 * of the bound specified by user in addr_len.
127 if (addr_len < SIN6_LEN_RFC2133)
130 sock_owned_by_me(sk);
132 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
135 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
138 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139 struct inet_sock *inet = inet_sk(sk);
140 struct inet_connection_sock *icsk = inet_csk(sk);
141 struct ipv6_pinfo *np = inet6_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 struct in6_addr *saddr = NULL, *final_p, final;
144 struct ipv6_txoptions *opt;
146 struct dst_entry *dst;
149 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
151 if (addr_len < SIN6_LEN_RFC2133)
154 if (usin->sin6_family != AF_INET6)
155 return -EAFNOSUPPORT;
157 memset(&fl6, 0, sizeof(fl6));
160 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6.flowlabel);
162 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 fl6_sock_release(flowlabel);
172 * connect() to INADDR_ANY means loopback (BSD'ism).
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
180 usin->sin6_addr = in6addr_loopback;
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
185 if (addr_type & IPV6_ADDR_MULTICAST)
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
209 WRITE_ONCE(tp->write_seq, 0);
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6.flowlabel;
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
223 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
225 if (__ipv6_only_sock(sk))
228 sin.sin_family = AF_INET;
229 sin.sin_port = usin->sin6_port;
230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
232 icsk->icsk_af_ops = &ipv6_mapped;
233 sk->sk_backlog_rcv = tcp_v4_do_rcv;
234 #ifdef CONFIG_TCP_MD5SIG
235 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
238 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
241 icsk->icsk_ext_hdr_len = exthdrlen;
242 icsk->icsk_af_ops = &ipv6_specific;
243 sk->sk_backlog_rcv = tcp_v6_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_specific;
249 np->saddr = sk->sk_v6_rcv_saddr;
254 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
255 saddr = &sk->sk_v6_rcv_saddr;
257 fl6.flowi6_proto = IPPROTO_TCP;
258 fl6.daddr = sk->sk_v6_daddr;
259 fl6.saddr = saddr ? *saddr : np->saddr;
260 fl6.flowi6_oif = sk->sk_bound_dev_if;
261 fl6.flowi6_mark = sk->sk_mark;
262 fl6.fl6_dport = usin->sin6_port;
263 fl6.fl6_sport = inet->inet_sport;
264 fl6.flowi6_uid = sk->sk_uid;
266 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
267 final_p = fl6_update_dst(&fl6, opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
279 sk->sk_v6_rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 ip6_dst_store(sk, dst, NULL, NULL);
289 icsk->icsk_ext_hdr_len = 0;
291 icsk->icsk_ext_hdr_len = opt->opt_flen +
294 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
296 inet->inet_dport = usin->sin6_port;
298 tcp_set_state(sk, TCP_SYN_SENT);
299 err = inet6_hash_connect(tcp_death_row, sk);
305 if (likely(!tp->repair)) {
307 WRITE_ONCE(tp->write_seq,
308 secure_tcpv6_seq(np->saddr.s6_addr32,
309 sk->sk_v6_daddr.s6_addr32,
312 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
314 sk->sk_v6_daddr.s6_addr32);
317 if (tcp_fastopen_defer_connect(sk, &err))
322 err = tcp_connect(sk);
329 tcp_set_state(sk, TCP_CLOSE);
331 inet->inet_dport = 0;
332 sk->sk_route_caps = 0;
336 static void tcp_v6_mtu_reduced(struct sock *sk)
338 struct dst_entry *dst;
341 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
344 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
346 /* Drop requests trying to increase our current mss.
347 * Check done in __ip6_rt_update_pmtu() is too late.
349 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
352 dst = inet6_csk_update_pmtu(sk, mtu);
356 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
357 tcp_sync_mss(sk, dst_mtu(dst));
358 tcp_simple_retransmit(sk);
362 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
363 u8 type, u8 code, int offset, __be32 info)
365 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
366 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
367 struct net *net = dev_net(skb->dev);
368 struct request_sock *fastopen;
369 struct ipv6_pinfo *np;
376 sk = __inet6_lookup_established(net, &tcp_hashinfo,
377 &hdr->daddr, th->dest,
378 &hdr->saddr, ntohs(th->source),
379 skb->dev->ifindex, inet6_sdif(skb));
382 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
387 if (sk->sk_state == TCP_TIME_WAIT) {
388 inet_twsk_put(inet_twsk(sk));
391 seq = ntohl(th->seq);
392 fatal = icmpv6_err_convert(type, code, &err);
393 if (sk->sk_state == TCP_NEW_SYN_RECV)
394 return tcp_req_err(sk, seq, fatal);
397 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
398 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
400 if (sk->sk_state == TCP_CLOSE)
403 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
404 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
409 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
410 fastopen = tp->fastopen_rsk;
411 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
412 if (sk->sk_state != TCP_LISTEN &&
413 !between(seq, snd_una, tp->snd_nxt)) {
414 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 if (type == NDISC_REDIRECT) {
421 if (!sock_owned_by_user(sk)) {
422 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
425 dst->ops->redirect(dst, sk, skb);
430 if (type == ICMPV6_PKT_TOOBIG) {
431 u32 mtu = ntohl(info);
433 /* We are not interested in TCP_LISTEN and open_requests
434 * (SYN-ACKs send out by Linux are always <576bytes so
435 * they should go through unfragmented).
437 if (sk->sk_state == TCP_LISTEN)
440 if (!ip6_sk_accept_pmtu(sk))
443 if (mtu < IPV6_MIN_MTU)
446 WRITE_ONCE(tp->mtu_info, mtu);
448 if (!sock_owned_by_user(sk))
449 tcp_v6_mtu_reduced(sk);
450 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
457 /* Might be for an request_sock */
458 switch (sk->sk_state) {
461 /* Only in fast or simultaneous open. If a fast open socket is
462 * is already accepted it is treated as a connected one below.
464 if (fastopen && !fastopen->sk)
467 if (!sock_owned_by_user(sk)) {
469 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
473 sk->sk_err_soft = err;
477 if (!sock_owned_by_user(sk) && np->recverr) {
479 sk->sk_error_report(sk);
481 sk->sk_err_soft = err;
489 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
491 struct request_sock *req,
492 struct tcp_fastopen_cookie *foc,
493 enum tcp_synack_type synack_type)
495 struct inet_request_sock *ireq = inet_rsk(req);
496 struct ipv6_pinfo *np = inet6_sk(sk);
497 struct ipv6_txoptions *opt;
498 struct flowi6 *fl6 = &fl->u.ip6;
502 /* First, grab a route. */
503 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
504 IPPROTO_TCP)) == NULL)
507 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
510 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
511 &ireq->ir_v6_rmt_addr);
513 fl6->daddr = ireq->ir_v6_rmt_addr;
514 if (np->repflow && ireq->pktopts)
515 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
518 opt = ireq->ipv6_opt;
520 opt = rcu_dereference(np->opt);
521 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
524 err = net_xmit_eval(err);
532 static void tcp_v6_reqsk_destructor(struct request_sock *req)
534 kfree(inet_rsk(req)->ipv6_opt);
535 kfree_skb(inet_rsk(req)->pktopts);
538 #ifdef CONFIG_TCP_MD5SIG
539 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
540 const struct in6_addr *addr)
542 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
545 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
546 const struct sock *addr_sk)
548 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
551 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
552 char __user *optval, int optlen)
554 struct tcp_md5sig cmd;
555 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
558 if (optlen < sizeof(cmd))
561 if (copy_from_user(&cmd, optval, sizeof(cmd)))
564 if (sin6->sin6_family != AF_INET6)
567 if (optname == TCP_MD5SIG_EXT &&
568 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
569 prefixlen = cmd.tcpm_prefixlen;
570 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
574 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
577 if (!cmd.tcpm_keylen) {
578 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
579 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
581 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
582 AF_INET6, prefixlen);
585 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
588 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590 AF_INET, prefixlen, cmd.tcpm_key,
591 cmd.tcpm_keylen, GFP_KERNEL);
593 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
594 AF_INET6, prefixlen, cmd.tcpm_key,
595 cmd.tcpm_keylen, GFP_KERNEL);
598 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
599 const struct in6_addr *daddr,
600 const struct in6_addr *saddr,
601 const struct tcphdr *th, int nbytes)
603 struct tcp6_pseudohdr *bp;
604 struct scatterlist sg;
608 /* 1. TCP pseudo-header (RFC2460) */
611 bp->protocol = cpu_to_be32(IPPROTO_TCP);
612 bp->len = cpu_to_be32(nbytes);
614 _th = (struct tcphdr *)(bp + 1);
615 memcpy(_th, th, sizeof(*th));
618 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
619 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
620 sizeof(*bp) + sizeof(*th));
621 return crypto_ahash_update(hp->md5_req);
624 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
625 const struct in6_addr *daddr, struct in6_addr *saddr,
626 const struct tcphdr *th)
628 struct tcp_md5sig_pool *hp;
629 struct ahash_request *req;
631 hp = tcp_get_md5sig_pool();
633 goto clear_hash_noput;
636 if (crypto_ahash_init(req))
638 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
640 if (tcp_md5_hash_key(hp, key))
642 ahash_request_set_crypt(req, NULL, md5_hash, 0);
643 if (crypto_ahash_final(req))
646 tcp_put_md5sig_pool();
650 tcp_put_md5sig_pool();
652 memset(md5_hash, 0, 16);
656 static int tcp_v6_md5_hash_skb(char *md5_hash,
657 const struct tcp_md5sig_key *key,
658 const struct sock *sk,
659 const struct sk_buff *skb)
661 const struct in6_addr *saddr, *daddr;
662 struct tcp_md5sig_pool *hp;
663 struct ahash_request *req;
664 const struct tcphdr *th = tcp_hdr(skb);
666 if (sk) { /* valid for establish/request sockets */
667 saddr = &sk->sk_v6_rcv_saddr;
668 daddr = &sk->sk_v6_daddr;
670 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
671 saddr = &ip6h->saddr;
672 daddr = &ip6h->daddr;
675 hp = tcp_get_md5sig_pool();
677 goto clear_hash_noput;
680 if (crypto_ahash_init(req))
683 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
685 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
687 if (tcp_md5_hash_key(hp, key))
689 ahash_request_set_crypt(req, NULL, md5_hash, 0);
690 if (crypto_ahash_final(req))
693 tcp_put_md5sig_pool();
697 tcp_put_md5sig_pool();
699 memset(md5_hash, 0, 16);
705 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
706 const struct sk_buff *skb)
708 #ifdef CONFIG_TCP_MD5SIG
709 const __u8 *hash_location = NULL;
710 struct tcp_md5sig_key *hash_expected;
711 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
712 const struct tcphdr *th = tcp_hdr(skb);
716 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
717 hash_location = tcp_parse_md5sig_option(th);
719 /* We've parsed the options - do we have a hash? */
720 if (!hash_expected && !hash_location)
723 if (hash_expected && !hash_location) {
724 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
728 if (!hash_expected && hash_location) {
729 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
733 /* check the signature */
734 genhash = tcp_v6_md5_hash_skb(newhash,
738 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
739 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
740 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
741 genhash ? "failed" : "mismatch",
742 &ip6h->saddr, ntohs(th->source),
743 &ip6h->daddr, ntohs(th->dest));
750 static void tcp_v6_init_req(struct request_sock *req,
751 const struct sock *sk_listener,
754 struct inet_request_sock *ireq = inet_rsk(req);
755 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
757 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
758 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
760 /* So that link locals have meaning */
761 if (!sk_listener->sk_bound_dev_if &&
762 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
763 ireq->ir_iif = tcp_v6_iif(skb);
765 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
766 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
767 np->rxopt.bits.rxinfo ||
768 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
769 np->rxopt.bits.rxohlim || np->repflow)) {
770 refcount_inc(&skb->users);
775 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
777 const struct request_sock *req)
779 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
782 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
784 .obj_size = sizeof(struct tcp6_request_sock),
785 .rtx_syn_ack = tcp_rtx_synack,
786 .send_ack = tcp_v6_reqsk_send_ack,
787 .destructor = tcp_v6_reqsk_destructor,
788 .send_reset = tcp_v6_send_reset,
789 .syn_ack_timeout = tcp_syn_ack_timeout,
792 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
793 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
794 sizeof(struct ipv6hdr),
795 #ifdef CONFIG_TCP_MD5SIG
796 .req_md5_lookup = tcp_v6_md5_lookup,
797 .calc_md5_hash = tcp_v6_md5_hash_skb,
799 .init_req = tcp_v6_init_req,
800 #ifdef CONFIG_SYN_COOKIES
801 .cookie_init_seq = cookie_v6_init_sequence,
803 .route_req = tcp_v6_route_req,
804 .init_seq = tcp_v6_init_seq,
805 .init_ts_off = tcp_v6_init_ts_off,
806 .send_synack = tcp_v6_send_synack,
809 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
810 u32 ack, u32 win, u32 tsval, u32 tsecr,
811 int oif, struct tcp_md5sig_key *key, int rst,
812 u8 tclass, __be32 label)
814 const struct tcphdr *th = tcp_hdr(skb);
816 struct sk_buff *buff;
818 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
819 struct sock *ctl_sk = net->ipv6.tcp_sk;
820 unsigned int tot_len = sizeof(struct tcphdr);
821 struct dst_entry *dst;
826 tot_len += TCPOLEN_TSTAMP_ALIGNED;
827 #ifdef CONFIG_TCP_MD5SIG
829 tot_len += TCPOLEN_MD5SIG_ALIGNED;
832 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
837 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
839 t1 = skb_push(buff, tot_len);
840 skb_reset_transport_header(buff);
842 /* Swap the send and the receive. */
843 memset(t1, 0, sizeof(*t1));
844 t1->dest = th->source;
845 t1->source = th->dest;
846 t1->doff = tot_len / 4;
847 t1->seq = htonl(seq);
848 t1->ack_seq = htonl(ack);
849 t1->ack = !rst || !th->ack;
851 t1->window = htons(win);
853 topt = (__be32 *)(t1 + 1);
856 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
857 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
858 *topt++ = htonl(tsval);
859 *topt++ = htonl(tsecr);
862 #ifdef CONFIG_TCP_MD5SIG
864 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
865 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
866 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
867 &ipv6_hdr(skb)->saddr,
868 &ipv6_hdr(skb)->daddr, t1);
872 memset(&fl6, 0, sizeof(fl6));
873 fl6.daddr = ipv6_hdr(skb)->saddr;
874 fl6.saddr = ipv6_hdr(skb)->daddr;
875 fl6.flowlabel = label;
877 buff->ip_summed = CHECKSUM_PARTIAL;
880 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
882 fl6.flowi6_proto = IPPROTO_TCP;
883 if (rt6_need_strict(&fl6.daddr) && !oif)
884 fl6.flowi6_oif = tcp_v6_iif(skb);
886 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
889 fl6.flowi6_oif = oif;
893 mark = (sk->sk_state == TCP_TIME_WAIT) ?
894 inet_twsk(sk)->tw_mark : sk->sk_mark;
895 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
896 fl6.fl6_dport = t1->dest;
897 fl6.fl6_sport = t1->source;
898 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
899 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
901 /* Pass a socket to ip6_dst_lookup either it is for RST
902 * Underlying function will use this to retrieve the network
905 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
907 skb_dst_set(buff, dst);
908 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
909 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
911 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
918 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
920 const struct tcphdr *th = tcp_hdr(skb);
921 u32 seq = 0, ack_seq = 0;
922 struct tcp_md5sig_key *key = NULL;
923 #ifdef CONFIG_TCP_MD5SIG
924 const __u8 *hash_location = NULL;
925 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
926 unsigned char newhash[16];
928 struct sock *sk1 = NULL;
935 /* If sk not NULL, it means we did a successful lookup and incoming
936 * route had to be correct. prequeue might have dropped our dst.
938 if (!sk && !ipv6_unicast_destination(skb))
941 #ifdef CONFIG_TCP_MD5SIG
943 hash_location = tcp_parse_md5sig_option(th);
944 if (sk && sk_fullsock(sk)) {
945 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
946 } else if (hash_location) {
948 * active side is lost. Try to find listening socket through
949 * source port, and then find md5 key through listening socket.
950 * we are not loose security here:
951 * Incoming packet is checked with md5 hash with finding key,
952 * no RST generated if md5 hash doesn't match.
954 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
955 &tcp_hashinfo, NULL, 0,
957 th->source, &ipv6h->daddr,
959 tcp_v6_iif_l3_slave(skb),
964 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
968 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
969 if (genhash || memcmp(hash_location, newhash, 16) != 0)
975 seq = ntohl(th->ack_seq);
977 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
981 oif = sk->sk_bound_dev_if;
983 trace_tcp_send_reset(sk, skb);
986 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
988 #ifdef CONFIG_TCP_MD5SIG
994 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
995 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
996 struct tcp_md5sig_key *key, u8 tclass,
999 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1003 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1005 struct inet_timewait_sock *tw = inet_twsk(sk);
1006 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1008 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1009 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1010 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1011 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1012 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1017 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1018 struct request_sock *req)
1020 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1021 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1024 * The window field (SEG.WND) of every outgoing segment, with the
1025 * exception of <SYN> segments, MUST be right-shifted by
1026 * Rcv.Wind.Shift bits:
1028 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1029 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1030 tcp_rsk(req)->rcv_nxt,
1031 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1032 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1033 req->ts_recent, sk->sk_bound_dev_if,
1034 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1039 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1041 #ifdef CONFIG_SYN_COOKIES
1042 const struct tcphdr *th = tcp_hdr(skb);
1045 sk = cookie_v6_check(sk, skb);
1050 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1052 if (skb->protocol == htons(ETH_P_IP))
1053 return tcp_v4_conn_request(sk, skb);
1055 if (!ipv6_unicast_destination(skb))
1058 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1059 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1063 return tcp_conn_request(&tcp6_request_sock_ops,
1064 &tcp_request_sock_ipv6_ops, sk, skb);
1068 return 0; /* don't send reset */
1071 static void tcp_v6_restore_cb(struct sk_buff *skb)
1073 /* We need to move header back to the beginning if xfrm6_policy_check()
1074 * and tcp_v6_fill_cb() are going to be called again.
1075 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1077 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1078 sizeof(struct inet6_skb_parm));
1081 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1082 struct request_sock *req,
1083 struct dst_entry *dst,
1084 struct request_sock *req_unhash,
1087 struct inet_request_sock *ireq;
1088 struct ipv6_pinfo *newnp;
1089 const struct ipv6_pinfo *np = inet6_sk(sk);
1090 struct ipv6_txoptions *opt;
1091 struct tcp6_sock *newtcp6sk;
1092 struct inet_sock *newinet;
1093 bool found_dup_sk = false;
1094 struct tcp_sock *newtp;
1096 #ifdef CONFIG_TCP_MD5SIG
1097 struct tcp_md5sig_key *key;
1101 if (skb->protocol == htons(ETH_P_IP)) {
1106 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1107 req_unhash, own_req);
1112 newtcp6sk = (struct tcp6_sock *)newsk;
1113 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1115 newinet = inet_sk(newsk);
1116 newnp = inet6_sk(newsk);
1117 newtp = tcp_sk(newsk);
1119 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1121 newnp->saddr = newsk->sk_v6_rcv_saddr;
1123 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1124 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1125 #ifdef CONFIG_TCP_MD5SIG
1126 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1129 newnp->ipv6_mc_list = NULL;
1130 newnp->ipv6_ac_list = NULL;
1131 newnp->ipv6_fl_list = NULL;
1132 newnp->pktoptions = NULL;
1134 newnp->mcast_oif = inet_iif(skb);
1135 newnp->mcast_hops = ip_hdr(skb)->ttl;
1136 newnp->rcv_flowinfo = 0;
1138 newnp->flow_label = 0;
1141 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1142 * here, tcp_create_openreq_child now does this for us, see the comment in
1143 * that function for the gory details. -acme
1146 /* It is tricky place. Until this moment IPv4 tcp
1147 worked with IPv6 icsk.icsk_af_ops.
1150 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1155 ireq = inet_rsk(req);
1157 if (sk_acceptq_is_full(sk))
1161 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1166 newsk = tcp_create_openreq_child(sk, req, skb);
1171 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1172 * count here, tcp_create_openreq_child now does this for us, see the
1173 * comment in that function for the gory details. -acme
1176 newsk->sk_gso_type = SKB_GSO_TCPV6;
1177 ip6_dst_store(newsk, dst, NULL, NULL);
1178 inet6_sk_rx_dst_set(newsk, skb);
1180 newtcp6sk = (struct tcp6_sock *)newsk;
1181 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1183 newtp = tcp_sk(newsk);
1184 newinet = inet_sk(newsk);
1185 newnp = inet6_sk(newsk);
1187 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1189 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1190 newnp->saddr = ireq->ir_v6_loc_addr;
1191 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1192 newsk->sk_bound_dev_if = ireq->ir_iif;
1194 /* Now IPv6 options...
1196 First: no IPv4 options.
1198 newinet->inet_opt = NULL;
1199 newnp->ipv6_mc_list = NULL;
1200 newnp->ipv6_ac_list = NULL;
1201 newnp->ipv6_fl_list = NULL;
1204 newnp->rxopt.all = np->rxopt.all;
1206 newnp->pktoptions = NULL;
1208 newnp->mcast_oif = tcp_v6_iif(skb);
1209 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1210 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1212 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1214 /* Clone native IPv6 options from listening socket (if any)
1216 Yes, keeping reference count would be much more clever,
1217 but we make one more one thing there: reattach optmem
1220 opt = ireq->ipv6_opt;
1222 opt = rcu_dereference(np->opt);
1224 opt = ipv6_dup_options(newsk, opt);
1225 RCU_INIT_POINTER(newnp->opt, opt);
1227 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1229 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1232 tcp_ca_openreq_child(newsk, dst);
1234 tcp_sync_mss(newsk, dst_mtu(dst));
1235 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1237 tcp_initialize_rcv_mss(newsk);
1239 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1240 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1242 #ifdef CONFIG_TCP_MD5SIG
1243 /* Copy over the MD5 key from the original socket */
1244 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1246 /* We're using one, so create a matching key
1247 * on the newsk structure. If we fail to get
1248 * memory, then we end up not copying the key
1251 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1252 AF_INET6, 128, key->key, key->keylen,
1253 sk_gfp_mask(sk, GFP_ATOMIC));
1257 if (__inet_inherit_port(sk, newsk) < 0) {
1258 inet_csk_prepare_forced_close(newsk);
1262 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1265 tcp_move_syn(newtp, req);
1267 /* Clone pktoptions received with SYN, if we own the req */
1268 if (ireq->pktopts) {
1269 newnp->pktoptions = skb_clone(ireq->pktopts,
1270 sk_gfp_mask(sk, GFP_ATOMIC));
1271 consume_skb(ireq->pktopts);
1272 ireq->pktopts = NULL;
1273 if (newnp->pktoptions) {
1274 tcp_v6_restore_cb(newnp->pktoptions);
1275 skb_set_owner_r(newnp->pktoptions, newsk);
1279 if (!req_unhash && found_dup_sk) {
1280 /* This code path should only be executed in the
1281 * syncookie case only
1283 bh_unlock_sock(newsk);
1292 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1300 /* The socket must have it's spinlock held when we get
1301 * here, unless it is a TCP_LISTEN socket.
1303 * We have a potential double-lock case here, so even when
1304 * doing backlog processing we use the BH locking scheme.
1305 * This is because we cannot sleep with the original spinlock
1308 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1310 struct ipv6_pinfo *np = inet6_sk(sk);
1311 struct tcp_sock *tp;
1312 struct sk_buff *opt_skb = NULL;
1314 /* Imagine: socket is IPv6. IPv4 packet arrives,
1315 goes to IPv4 receive handler and backlogged.
1316 From backlog it always goes here. Kerboom...
1317 Fortunately, tcp_rcv_established and rcv_established
1318 handle them correctly, but it is not case with
1319 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1322 if (skb->protocol == htons(ETH_P_IP))
1323 return tcp_v4_do_rcv(sk, skb);
1326 * socket locking is here for SMP purposes as backlog rcv
1327 * is currently called with bh processing disabled.
1330 /* Do Stevens' IPV6_PKTOPTIONS.
1332 Yes, guys, it is the only place in our code, where we
1333 may make it not affecting IPv4.
1334 The rest of code is protocol independent,
1335 and I do not like idea to uglify IPv4.
1337 Actually, all the idea behind IPV6_PKTOPTIONS
1338 looks not very well thought. For now we latch
1339 options, received in the last packet, enqueued
1340 by tcp. Feel free to propose better solution.
1344 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1346 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1347 struct dst_entry *dst = sk->sk_rx_dst;
1349 sock_rps_save_rxhash(sk, skb);
1350 sk_mark_napi_id(sk, skb);
1352 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1353 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1355 sk->sk_rx_dst = NULL;
1359 tcp_rcv_established(sk, skb);
1361 goto ipv6_pktoptions;
1365 if (tcp_checksum_complete(skb))
1368 if (sk->sk_state == TCP_LISTEN) {
1369 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1375 if (tcp_child_process(sk, nsk, skb))
1378 __kfree_skb(opt_skb);
1382 sock_rps_save_rxhash(sk, skb);
1384 if (tcp_rcv_state_process(sk, skb))
1387 goto ipv6_pktoptions;
1391 tcp_v6_send_reset(sk, skb);
1394 __kfree_skb(opt_skb);
1398 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1399 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1404 /* Do you ask, what is it?
1406 1. skb was enqueued by tcp.
1407 2. skb is added to tail of read queue, rather than out of order.
1408 3. socket is not in passive state.
1409 4. Finally, it really contains options, which user wants to receive.
1412 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1413 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1414 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1415 np->mcast_oif = tcp_v6_iif(opt_skb);
1416 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1417 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1418 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1419 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1421 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1422 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1423 skb_set_owner_r(opt_skb, sk);
1424 tcp_v6_restore_cb(opt_skb);
1425 opt_skb = xchg(&np->pktoptions, opt_skb);
1427 __kfree_skb(opt_skb);
1428 opt_skb = xchg(&np->pktoptions, NULL);
1436 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1437 const struct tcphdr *th)
1439 /* This is tricky: we move IP6CB at its correct location into
1440 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1441 * _decode_session6() uses IP6CB().
1442 * barrier() makes sure compiler won't play aliasing games.
1444 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1445 sizeof(struct inet6_skb_parm));
1448 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1449 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1450 skb->len - th->doff*4);
1451 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1452 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1453 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1454 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1455 TCP_SKB_CB(skb)->sacked = 0;
1456 TCP_SKB_CB(skb)->has_rxtstamp =
1457 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1460 static int tcp_v6_rcv(struct sk_buff *skb)
1462 int sdif = inet6_sdif(skb);
1463 const struct tcphdr *th;
1464 const struct ipv6hdr *hdr;
1468 struct net *net = dev_net(skb->dev);
1470 if (skb->pkt_type != PACKET_HOST)
1474 * Count it even if it's bad.
1476 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1478 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1481 th = (const struct tcphdr *)skb->data;
1483 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1485 if (!pskb_may_pull(skb, th->doff*4))
1488 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1491 th = (const struct tcphdr *)skb->data;
1492 hdr = ipv6_hdr(skb);
1495 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1496 th->source, th->dest, inet6_iif(skb), sdif,
1502 if (sk->sk_state == TCP_TIME_WAIT)
1505 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1506 struct request_sock *req = inet_reqsk(sk);
1507 bool req_stolen = false;
1510 sk = req->rsk_listener;
1511 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1512 sk_drops_add(sk, skb);
1516 if (tcp_checksum_complete(skb)) {
1520 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1521 inet_csk_reqsk_queue_drop_and_put(sk, req);
1527 if (!tcp_filter(sk, skb)) {
1528 th = (const struct tcphdr *)skb->data;
1529 hdr = ipv6_hdr(skb);
1530 tcp_v6_fill_cb(skb, hdr, th);
1531 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1536 /* Another cpu got exclusive access to req
1537 * and created a full blown socket.
1538 * Try to feed this packet to this socket
1539 * instead of discarding it.
1541 tcp_v6_restore_cb(skb);
1545 goto discard_and_relse;
1549 tcp_v6_restore_cb(skb);
1550 } else if (tcp_child_process(sk, nsk, skb)) {
1551 tcp_v6_send_reset(nsk, skb);
1552 goto discard_and_relse;
1558 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1559 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1560 goto discard_and_relse;
1563 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1564 goto discard_and_relse;
1566 if (tcp_v6_inbound_md5_hash(sk, skb))
1567 goto discard_and_relse;
1569 if (tcp_filter(sk, skb))
1570 goto discard_and_relse;
1571 th = (const struct tcphdr *)skb->data;
1572 hdr = ipv6_hdr(skb);
1573 tcp_v6_fill_cb(skb, hdr, th);
1577 if (sk->sk_state == TCP_LISTEN) {
1578 ret = tcp_v6_do_rcv(sk, skb);
1579 goto put_and_return;
1582 sk_incoming_cpu_update(sk);
1584 bh_lock_sock_nested(sk);
1585 tcp_segs_in(tcp_sk(sk), skb);
1587 if (!sock_owned_by_user(sk)) {
1588 ret = tcp_v6_do_rcv(sk, skb);
1589 } else if (tcp_add_backlog(sk, skb)) {
1590 goto discard_and_relse;
1597 return ret ? -1 : 0;
1600 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1603 tcp_v6_fill_cb(skb, hdr, th);
1605 if (tcp_checksum_complete(skb)) {
1607 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1609 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1611 tcp_v6_send_reset(NULL, skb);
1619 sk_drops_add(sk, skb);
1625 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1626 inet_twsk_put(inet_twsk(sk));
1630 tcp_v6_fill_cb(skb, hdr, th);
1632 if (tcp_checksum_complete(skb)) {
1633 inet_twsk_put(inet_twsk(sk));
1637 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1642 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1643 skb, __tcp_hdrlen(th),
1644 &ipv6_hdr(skb)->saddr, th->source,
1645 &ipv6_hdr(skb)->daddr,
1647 tcp_v6_iif_l3_slave(skb),
1650 struct inet_timewait_sock *tw = inet_twsk(sk);
1651 inet_twsk_deschedule_put(tw);
1653 tcp_v6_restore_cb(skb);
1661 tcp_v6_timewait_ack(sk, skb);
1664 tcp_v6_send_reset(sk, skb);
1665 inet_twsk_deschedule_put(inet_twsk(sk));
1667 case TCP_TW_SUCCESS:
1673 static void tcp_v6_early_demux(struct sk_buff *skb)
1675 const struct ipv6hdr *hdr;
1676 const struct tcphdr *th;
1679 if (skb->pkt_type != PACKET_HOST)
1682 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1685 hdr = ipv6_hdr(skb);
1688 if (th->doff < sizeof(struct tcphdr) / 4)
1691 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1692 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1693 &hdr->saddr, th->source,
1694 &hdr->daddr, ntohs(th->dest),
1695 inet6_iif(skb), inet6_sdif(skb));
1698 skb->destructor = sock_edemux;
1699 if (sk_fullsock(sk)) {
1700 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1703 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1705 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1706 skb_dst_set_noref(skb, dst);
1711 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1712 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1713 .twsk_unique = tcp_twsk_unique,
1714 .twsk_destructor = tcp_twsk_destructor,
1717 static const struct inet_connection_sock_af_ops ipv6_specific = {
1718 .queue_xmit = inet6_csk_xmit,
1719 .send_check = tcp_v6_send_check,
1720 .rebuild_header = inet6_sk_rebuild_header,
1721 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1722 .conn_request = tcp_v6_conn_request,
1723 .syn_recv_sock = tcp_v6_syn_recv_sock,
1724 .net_header_len = sizeof(struct ipv6hdr),
1725 .net_frag_header_len = sizeof(struct frag_hdr),
1726 .setsockopt = ipv6_setsockopt,
1727 .getsockopt = ipv6_getsockopt,
1728 .addr2sockaddr = inet6_csk_addr2sockaddr,
1729 .sockaddr_len = sizeof(struct sockaddr_in6),
1730 #ifdef CONFIG_COMPAT
1731 .compat_setsockopt = compat_ipv6_setsockopt,
1732 .compat_getsockopt = compat_ipv6_getsockopt,
1734 .mtu_reduced = tcp_v6_mtu_reduced,
1737 #ifdef CONFIG_TCP_MD5SIG
1738 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1739 .md5_lookup = tcp_v6_md5_lookup,
1740 .calc_md5_hash = tcp_v6_md5_hash_skb,
1741 .md5_parse = tcp_v6_parse_md5_keys,
1746 * TCP over IPv4 via INET6 API
1748 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1749 .queue_xmit = ip_queue_xmit,
1750 .send_check = tcp_v4_send_check,
1751 .rebuild_header = inet_sk_rebuild_header,
1752 .sk_rx_dst_set = inet_sk_rx_dst_set,
1753 .conn_request = tcp_v6_conn_request,
1754 .syn_recv_sock = tcp_v6_syn_recv_sock,
1755 .net_header_len = sizeof(struct iphdr),
1756 .setsockopt = ipv6_setsockopt,
1757 .getsockopt = ipv6_getsockopt,
1758 .addr2sockaddr = inet6_csk_addr2sockaddr,
1759 .sockaddr_len = sizeof(struct sockaddr_in6),
1760 #ifdef CONFIG_COMPAT
1761 .compat_setsockopt = compat_ipv6_setsockopt,
1762 .compat_getsockopt = compat_ipv6_getsockopt,
1764 .mtu_reduced = tcp_v4_mtu_reduced,
1767 #ifdef CONFIG_TCP_MD5SIG
1768 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1769 .md5_lookup = tcp_v4_md5_lookup,
1770 .calc_md5_hash = tcp_v4_md5_hash_skb,
1771 .md5_parse = tcp_v6_parse_md5_keys,
1775 /* NOTE: A lot of things set to zero explicitly by call to
1776 * sk_alloc() so need not be done here.
1778 static int tcp_v6_init_sock(struct sock *sk)
1780 struct inet_connection_sock *icsk = inet_csk(sk);
1784 icsk->icsk_af_ops = &ipv6_specific;
1786 #ifdef CONFIG_TCP_MD5SIG
1787 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1793 static void tcp_v6_destroy_sock(struct sock *sk)
1795 tcp_v4_destroy_sock(sk);
1796 inet6_destroy_sock(sk);
1799 #ifdef CONFIG_PROC_FS
1800 /* Proc filesystem TCPv6 sock list dumping. */
1801 static void get_openreq6(struct seq_file *seq,
1802 const struct request_sock *req, int i)
1804 long ttd = req->rsk_timer.expires - jiffies;
1805 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1806 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1812 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1813 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1815 src->s6_addr32[0], src->s6_addr32[1],
1816 src->s6_addr32[2], src->s6_addr32[3],
1817 inet_rsk(req)->ir_num,
1818 dest->s6_addr32[0], dest->s6_addr32[1],
1819 dest->s6_addr32[2], dest->s6_addr32[3],
1820 ntohs(inet_rsk(req)->ir_rmt_port),
1822 0, 0, /* could print option size, but that is af dependent. */
1823 1, /* timers active (only the expire timer) */
1824 jiffies_to_clock_t(ttd),
1826 from_kuid_munged(seq_user_ns(seq),
1827 sock_i_uid(req->rsk_listener)),
1828 0, /* non standard timer */
1829 0, /* open_requests have no inode */
1833 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1835 const struct in6_addr *dest, *src;
1838 unsigned long timer_expires;
1839 const struct inet_sock *inet = inet_sk(sp);
1840 const struct tcp_sock *tp = tcp_sk(sp);
1841 const struct inet_connection_sock *icsk = inet_csk(sp);
1842 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1846 dest = &sp->sk_v6_daddr;
1847 src = &sp->sk_v6_rcv_saddr;
1848 destp = ntohs(inet->inet_dport);
1849 srcp = ntohs(inet->inet_sport);
1851 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1852 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1853 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1855 timer_expires = icsk->icsk_timeout;
1856 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1858 timer_expires = icsk->icsk_timeout;
1859 } else if (timer_pending(&sp->sk_timer)) {
1861 timer_expires = sp->sk_timer.expires;
1864 timer_expires = jiffies;
1867 state = inet_sk_state_load(sp);
1868 if (state == TCP_LISTEN)
1869 rx_queue = sp->sk_ack_backlog;
1871 /* Because we don't lock the socket,
1872 * we might find a transient negative value.
1874 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1875 READ_ONCE(tp->copied_seq), 0);
1878 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1879 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1881 src->s6_addr32[0], src->s6_addr32[1],
1882 src->s6_addr32[2], src->s6_addr32[3], srcp,
1883 dest->s6_addr32[0], dest->s6_addr32[1],
1884 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1886 READ_ONCE(tp->write_seq) - tp->snd_una,
1889 jiffies_delta_to_clock_t(timer_expires - jiffies),
1890 icsk->icsk_retransmits,
1891 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1892 icsk->icsk_probes_out,
1894 refcount_read(&sp->sk_refcnt), sp,
1895 jiffies_to_clock_t(icsk->icsk_rto),
1896 jiffies_to_clock_t(icsk->icsk_ack.ato),
1897 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1899 state == TCP_LISTEN ?
1900 fastopenq->max_qlen :
1901 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1905 static void get_timewait6_sock(struct seq_file *seq,
1906 struct inet_timewait_sock *tw, int i)
1908 long delta = tw->tw_timer.expires - jiffies;
1909 const struct in6_addr *dest, *src;
1912 dest = &tw->tw_v6_daddr;
1913 src = &tw->tw_v6_rcv_saddr;
1914 destp = ntohs(tw->tw_dport);
1915 srcp = ntohs(tw->tw_sport);
1918 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1919 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1921 src->s6_addr32[0], src->s6_addr32[1],
1922 src->s6_addr32[2], src->s6_addr32[3], srcp,
1923 dest->s6_addr32[0], dest->s6_addr32[1],
1924 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1925 tw->tw_substate, 0, 0,
1926 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1927 refcount_read(&tw->tw_refcnt), tw);
1930 static int tcp6_seq_show(struct seq_file *seq, void *v)
1932 struct tcp_iter_state *st;
1933 struct sock *sk = v;
1935 if (v == SEQ_START_TOKEN) {
1940 "st tx_queue rx_queue tr tm->when retrnsmt"
1941 " uid timeout inode\n");
1946 if (sk->sk_state == TCP_TIME_WAIT)
1947 get_timewait6_sock(seq, v, st->num);
1948 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1949 get_openreq6(seq, v, st->num);
1951 get_tcp6_sock(seq, v, st->num);
1956 static const struct seq_operations tcp6_seq_ops = {
1957 .show = tcp6_seq_show,
1958 .start = tcp_seq_start,
1959 .next = tcp_seq_next,
1960 .stop = tcp_seq_stop,
1963 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1967 int __net_init tcp6_proc_init(struct net *net)
1969 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1970 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1975 void tcp6_proc_exit(struct net *net)
1977 remove_proc_entry("tcp6", net->proc_net);
1981 struct proto tcpv6_prot = {
1983 .owner = THIS_MODULE,
1985 .pre_connect = tcp_v6_pre_connect,
1986 .connect = tcp_v6_connect,
1987 .disconnect = tcp_disconnect,
1988 .accept = inet_csk_accept,
1990 .init = tcp_v6_init_sock,
1991 .destroy = tcp_v6_destroy_sock,
1992 .shutdown = tcp_shutdown,
1993 .setsockopt = tcp_setsockopt,
1994 .getsockopt = tcp_getsockopt,
1995 .keepalive = tcp_set_keepalive,
1996 .recvmsg = tcp_recvmsg,
1997 .sendmsg = tcp_sendmsg,
1998 .sendpage = tcp_sendpage,
1999 .backlog_rcv = tcp_v6_do_rcv,
2000 .release_cb = tcp_release_cb,
2002 .unhash = inet_unhash,
2003 .get_port = inet_csk_get_port,
2004 .enter_memory_pressure = tcp_enter_memory_pressure,
2005 .leave_memory_pressure = tcp_leave_memory_pressure,
2006 .stream_memory_free = tcp_stream_memory_free,
2007 .sockets_allocated = &tcp_sockets_allocated,
2008 .memory_allocated = &tcp_memory_allocated,
2009 .memory_pressure = &tcp_memory_pressure,
2010 .orphan_count = &tcp_orphan_count,
2011 .sysctl_mem = sysctl_tcp_mem,
2012 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2013 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2014 .max_header = MAX_TCP_HEADER,
2015 .obj_size = sizeof(struct tcp6_sock),
2016 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2017 .twsk_prot = &tcp6_timewait_sock_ops,
2018 .rsk_prot = &tcp6_request_sock_ops,
2019 .h.hashinfo = &tcp_hashinfo,
2020 .no_autobind = true,
2021 #ifdef CONFIG_COMPAT
2022 .compat_setsockopt = compat_tcp_setsockopt,
2023 .compat_getsockopt = compat_tcp_getsockopt,
2025 .diag_destroy = tcp_abort,
2028 /* thinking of making this const? Don't.
2029 * early_demux can change based on sysctl.
2031 static struct inet6_protocol tcpv6_protocol = {
2032 .early_demux = tcp_v6_early_demux,
2033 .early_demux_handler = tcp_v6_early_demux,
2034 .handler = tcp_v6_rcv,
2035 .err_handler = tcp_v6_err,
2036 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2039 static struct inet_protosw tcpv6_protosw = {
2040 .type = SOCK_STREAM,
2041 .protocol = IPPROTO_TCP,
2042 .prot = &tcpv6_prot,
2043 .ops = &inet6_stream_ops,
2044 .flags = INET_PROTOSW_PERMANENT |
2048 static int __net_init tcpv6_net_init(struct net *net)
2050 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2051 SOCK_RAW, IPPROTO_TCP, net);
2054 static void __net_exit tcpv6_net_exit(struct net *net)
2056 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2059 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2061 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2064 static struct pernet_operations tcpv6_net_ops = {
2065 .init = tcpv6_net_init,
2066 .exit = tcpv6_net_exit,
2067 .exit_batch = tcpv6_net_exit_batch,
2070 int __init tcpv6_init(void)
2074 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2078 /* register inet6 protocol */
2079 ret = inet6_register_protosw(&tcpv6_protosw);
2081 goto out_tcpv6_protocol;
2083 ret = register_pernet_subsys(&tcpv6_net_ops);
2085 goto out_tcpv6_protosw;
2090 inet6_unregister_protosw(&tcpv6_protosw);
2092 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2096 void tcpv6_exit(void)
2098 unregister_pernet_subsys(&tcpv6_net_ops);
2099 inet6_unregister_protosw(&tcpv6_protosw);
2100 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);