3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 #include <trace/events/tcp.h>
74 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static const struct inet_connection_sock_af_ops ipv6_mapped;
81 static const struct inet_connection_sock_af_ops ipv6_specific;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
86 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
87 const struct in6_addr *addr)
93 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
95 struct dst_entry *dst = skb_dst(skb);
97 if (dst && dst_hold_safe(dst)) {
98 const struct rt6_info *rt = (const struct rt6_info *)dst;
101 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
102 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
106 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
108 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
109 ipv6_hdr(skb)->saddr.s6_addr32,
111 tcp_hdr(skb)->source);
114 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
116 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
117 ipv6_hdr(skb)->saddr.s6_addr32);
120 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
123 /* This check is replicated from tcp_v6_connect() and intended to
124 * prevent BPF program called below from accessing bytes that are out
125 * of the bound specified by user in addr_len.
127 if (addr_len < SIN6_LEN_RFC2133)
130 sock_owned_by_me(sk);
132 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
135 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
138 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
139 struct inet_sock *inet = inet_sk(sk);
140 struct inet_connection_sock *icsk = inet_csk(sk);
141 struct ipv6_pinfo *np = inet6_sk(sk);
142 struct tcp_sock *tp = tcp_sk(sk);
143 struct in6_addr *saddr = NULL, *final_p, final;
144 struct ipv6_txoptions *opt;
146 struct dst_entry *dst;
149 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
151 if (addr_len < SIN6_LEN_RFC2133)
154 if (usin->sin6_family != AF_INET6)
155 return -EAFNOSUPPORT;
157 memset(&fl6, 0, sizeof(fl6));
160 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
161 IP6_ECN_flow_init(fl6.flowlabel);
162 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
163 struct ip6_flowlabel *flowlabel;
164 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 fl6_sock_release(flowlabel);
172 * connect() to INADDR_ANY means loopback (BSD'ism).
175 if (ipv6_addr_any(&usin->sin6_addr)) {
176 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
177 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
180 usin->sin6_addr = in6addr_loopback;
183 addr_type = ipv6_addr_type(&usin->sin6_addr);
185 if (addr_type & IPV6_ADDR_MULTICAST)
188 if (addr_type&IPV6_ADDR_LINKLOCAL) {
189 if (addr_len >= sizeof(struct sockaddr_in6) &&
190 usin->sin6_scope_id) {
191 /* If interface is set while binding, indices
194 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
197 sk->sk_bound_dev_if = usin->sin6_scope_id;
200 /* Connect to link-local address requires an interface */
201 if (!sk->sk_bound_dev_if)
205 if (tp->rx_opt.ts_recent_stamp &&
206 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
207 tp->rx_opt.ts_recent = 0;
208 tp->rx_opt.ts_recent_stamp = 0;
209 WRITE_ONCE(tp->write_seq, 0);
212 sk->sk_v6_daddr = usin->sin6_addr;
213 np->flow_label = fl6.flowlabel;
219 if (addr_type & IPV6_ADDR_MAPPED) {
220 u32 exthdrlen = icsk->icsk_ext_hdr_len;
221 struct sockaddr_in sin;
223 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
225 if (__ipv6_only_sock(sk))
228 sin.sin_family = AF_INET;
229 sin.sin_port = usin->sin6_port;
230 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
232 icsk->icsk_af_ops = &ipv6_mapped;
233 sk->sk_backlog_rcv = tcp_v4_do_rcv;
234 #ifdef CONFIG_TCP_MD5SIG
235 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
238 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
241 icsk->icsk_ext_hdr_len = exthdrlen;
242 icsk->icsk_af_ops = &ipv6_specific;
243 sk->sk_backlog_rcv = tcp_v6_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_specific;
249 np->saddr = sk->sk_v6_rcv_saddr;
254 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
255 saddr = &sk->sk_v6_rcv_saddr;
257 fl6.flowi6_proto = IPPROTO_TCP;
258 fl6.daddr = sk->sk_v6_daddr;
259 fl6.saddr = saddr ? *saddr : np->saddr;
260 fl6.flowi6_oif = sk->sk_bound_dev_if;
261 fl6.flowi6_mark = sk->sk_mark;
262 fl6.fl6_dport = usin->sin6_port;
263 fl6.fl6_sport = inet->inet_sport;
264 fl6.flowi6_uid = sk->sk_uid;
266 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
267 final_p = fl6_update_dst(&fl6, opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
279 sk->sk_v6_rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 ip6_dst_store(sk, dst, NULL, NULL);
289 icsk->icsk_ext_hdr_len = 0;
291 icsk->icsk_ext_hdr_len = opt->opt_flen +
294 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
296 inet->inet_dport = usin->sin6_port;
298 tcp_set_state(sk, TCP_SYN_SENT);
299 err = inet6_hash_connect(tcp_death_row, sk);
305 if (likely(!tp->repair)) {
307 WRITE_ONCE(tp->write_seq,
308 secure_tcpv6_seq(np->saddr.s6_addr32,
309 sk->sk_v6_daddr.s6_addr32,
312 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
314 sk->sk_v6_daddr.s6_addr32);
317 if (tcp_fastopen_defer_connect(sk, &err))
322 err = tcp_connect(sk);
329 tcp_set_state(sk, TCP_CLOSE);
331 inet->inet_dport = 0;
332 sk->sk_route_caps = 0;
336 static void tcp_v6_mtu_reduced(struct sock *sk)
338 struct dst_entry *dst;
341 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
344 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
346 /* Drop requests trying to increase our current mss.
347 * Check done in __ip6_rt_update_pmtu() is too late.
349 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
352 dst = inet6_csk_update_pmtu(sk, mtu);
356 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
357 tcp_sync_mss(sk, dst_mtu(dst));
358 tcp_simple_retransmit(sk);
362 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
363 u8 type, u8 code, int offset, __be32 info)
365 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
366 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
367 struct net *net = dev_net(skb->dev);
368 struct request_sock *fastopen;
369 struct ipv6_pinfo *np;
376 sk = __inet6_lookup_established(net, &tcp_hashinfo,
377 &hdr->daddr, th->dest,
378 &hdr->saddr, ntohs(th->source),
379 skb->dev->ifindex, inet6_sdif(skb));
382 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
387 if (sk->sk_state == TCP_TIME_WAIT) {
388 inet_twsk_put(inet_twsk(sk));
391 seq = ntohl(th->seq);
392 fatal = icmpv6_err_convert(type, code, &err);
393 if (sk->sk_state == TCP_NEW_SYN_RECV)
394 return tcp_req_err(sk, seq, fatal);
397 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
398 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
400 if (sk->sk_state == TCP_CLOSE)
403 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
404 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
409 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
410 fastopen = tp->fastopen_rsk;
411 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
412 if (sk->sk_state != TCP_LISTEN &&
413 !between(seq, snd_una, tp->snd_nxt)) {
414 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 if (type == NDISC_REDIRECT) {
421 if (!sock_owned_by_user(sk)) {
422 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
425 dst->ops->redirect(dst, sk, skb);
430 if (type == ICMPV6_PKT_TOOBIG) {
431 u32 mtu = ntohl(info);
433 /* We are not interested in TCP_LISTEN and open_requests
434 * (SYN-ACKs send out by Linux are always <576bytes so
435 * they should go through unfragmented).
437 if (sk->sk_state == TCP_LISTEN)
440 if (!ip6_sk_accept_pmtu(sk))
443 if (mtu < IPV6_MIN_MTU)
446 WRITE_ONCE(tp->mtu_info, mtu);
448 if (!sock_owned_by_user(sk))
449 tcp_v6_mtu_reduced(sk);
450 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
457 /* Might be for an request_sock */
458 switch (sk->sk_state) {
461 /* Only in fast or simultaneous open. If a fast open socket is
462 * is already accepted it is treated as a connected one below.
464 if (fastopen && !fastopen->sk)
467 if (!sock_owned_by_user(sk)) {
469 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
473 sk->sk_err_soft = err;
477 if (!sock_owned_by_user(sk) && np->recverr) {
479 sk->sk_error_report(sk);
481 sk->sk_err_soft = err;
489 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
491 struct request_sock *req,
492 struct tcp_fastopen_cookie *foc,
493 enum tcp_synack_type synack_type)
495 struct inet_request_sock *ireq = inet_rsk(req);
496 struct ipv6_pinfo *np = inet6_sk(sk);
497 struct ipv6_txoptions *opt;
498 struct flowi6 *fl6 = &fl->u.ip6;
502 /* First, grab a route. */
503 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
504 IPPROTO_TCP)) == NULL)
507 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
510 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
511 &ireq->ir_v6_rmt_addr);
513 fl6->daddr = ireq->ir_v6_rmt_addr;
514 if (np->repflow && ireq->pktopts)
515 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
518 opt = ireq->ipv6_opt;
520 opt = rcu_dereference(np->opt);
521 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
524 err = net_xmit_eval(err);
532 static void tcp_v6_reqsk_destructor(struct request_sock *req)
534 kfree(inet_rsk(req)->ipv6_opt);
535 kfree_skb(inet_rsk(req)->pktopts);
538 #ifdef CONFIG_TCP_MD5SIG
539 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
540 const struct in6_addr *addr)
542 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
545 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
546 const struct sock *addr_sk)
548 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
551 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
552 char __user *optval, int optlen)
554 struct tcp_md5sig cmd;
555 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
558 if (optlen < sizeof(cmd))
561 if (copy_from_user(&cmd, optval, sizeof(cmd)))
564 if (sin6->sin6_family != AF_INET6)
567 if (optname == TCP_MD5SIG_EXT &&
568 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
569 prefixlen = cmd.tcpm_prefixlen;
570 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
574 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
577 if (!cmd.tcpm_keylen) {
578 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
579 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
581 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
582 AF_INET6, prefixlen);
585 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
588 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
589 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
590 AF_INET, prefixlen, cmd.tcpm_key,
591 cmd.tcpm_keylen, GFP_KERNEL);
593 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
594 AF_INET6, prefixlen, cmd.tcpm_key,
595 cmd.tcpm_keylen, GFP_KERNEL);
598 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
599 const struct in6_addr *daddr,
600 const struct in6_addr *saddr,
601 const struct tcphdr *th, int nbytes)
603 struct tcp6_pseudohdr *bp;
604 struct scatterlist sg;
608 /* 1. TCP pseudo-header (RFC2460) */
611 bp->protocol = cpu_to_be32(IPPROTO_TCP);
612 bp->len = cpu_to_be32(nbytes);
614 _th = (struct tcphdr *)(bp + 1);
615 memcpy(_th, th, sizeof(*th));
618 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
619 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
620 sizeof(*bp) + sizeof(*th));
621 return crypto_ahash_update(hp->md5_req);
624 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
625 const struct in6_addr *daddr, struct in6_addr *saddr,
626 const struct tcphdr *th)
628 struct tcp_md5sig_pool *hp;
629 struct ahash_request *req;
631 hp = tcp_get_md5sig_pool();
633 goto clear_hash_noput;
636 if (crypto_ahash_init(req))
638 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
640 if (tcp_md5_hash_key(hp, key))
642 ahash_request_set_crypt(req, NULL, md5_hash, 0);
643 if (crypto_ahash_final(req))
646 tcp_put_md5sig_pool();
650 tcp_put_md5sig_pool();
652 memset(md5_hash, 0, 16);
656 static int tcp_v6_md5_hash_skb(char *md5_hash,
657 const struct tcp_md5sig_key *key,
658 const struct sock *sk,
659 const struct sk_buff *skb)
661 const struct in6_addr *saddr, *daddr;
662 struct tcp_md5sig_pool *hp;
663 struct ahash_request *req;
664 const struct tcphdr *th = tcp_hdr(skb);
666 if (sk) { /* valid for establish/request sockets */
667 saddr = &sk->sk_v6_rcv_saddr;
668 daddr = &sk->sk_v6_daddr;
670 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
671 saddr = &ip6h->saddr;
672 daddr = &ip6h->daddr;
675 hp = tcp_get_md5sig_pool();
677 goto clear_hash_noput;
680 if (crypto_ahash_init(req))
683 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
685 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
687 if (tcp_md5_hash_key(hp, key))
689 ahash_request_set_crypt(req, NULL, md5_hash, 0);
690 if (crypto_ahash_final(req))
693 tcp_put_md5sig_pool();
697 tcp_put_md5sig_pool();
699 memset(md5_hash, 0, 16);
705 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
706 const struct sk_buff *skb)
708 #ifdef CONFIG_TCP_MD5SIG
709 const __u8 *hash_location = NULL;
710 struct tcp_md5sig_key *hash_expected;
711 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
712 const struct tcphdr *th = tcp_hdr(skb);
716 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
717 hash_location = tcp_parse_md5sig_option(th);
719 /* We've parsed the options - do we have a hash? */
720 if (!hash_expected && !hash_location)
723 if (hash_expected && !hash_location) {
724 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
728 if (!hash_expected && hash_location) {
729 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
733 /* check the signature */
734 genhash = tcp_v6_md5_hash_skb(newhash,
738 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
739 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
740 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
741 genhash ? "failed" : "mismatch",
742 &ip6h->saddr, ntohs(th->source),
743 &ip6h->daddr, ntohs(th->dest));
750 static void tcp_v6_init_req(struct request_sock *req,
751 const struct sock *sk_listener,
754 struct inet_request_sock *ireq = inet_rsk(req);
755 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
757 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
758 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
760 /* So that link locals have meaning */
761 if (!sk_listener->sk_bound_dev_if &&
762 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
763 ireq->ir_iif = tcp_v6_iif(skb);
765 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
766 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
767 np->rxopt.bits.rxinfo ||
768 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
769 np->rxopt.bits.rxohlim || np->repflow)) {
770 refcount_inc(&skb->users);
775 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
777 const struct request_sock *req)
779 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
782 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
784 .obj_size = sizeof(struct tcp6_request_sock),
785 .rtx_syn_ack = tcp_rtx_synack,
786 .send_ack = tcp_v6_reqsk_send_ack,
787 .destructor = tcp_v6_reqsk_destructor,
788 .send_reset = tcp_v6_send_reset,
789 .syn_ack_timeout = tcp_syn_ack_timeout,
792 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
793 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
794 sizeof(struct ipv6hdr),
795 #ifdef CONFIG_TCP_MD5SIG
796 .req_md5_lookup = tcp_v6_md5_lookup,
797 .calc_md5_hash = tcp_v6_md5_hash_skb,
799 .init_req = tcp_v6_init_req,
800 #ifdef CONFIG_SYN_COOKIES
801 .cookie_init_seq = cookie_v6_init_sequence,
803 .route_req = tcp_v6_route_req,
804 .init_seq = tcp_v6_init_seq,
805 .init_ts_off = tcp_v6_init_ts_off,
806 .send_synack = tcp_v6_send_synack,
809 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
810 u32 ack, u32 win, u32 tsval, u32 tsecr,
811 int oif, struct tcp_md5sig_key *key, int rst,
812 u8 tclass, __be32 label)
814 const struct tcphdr *th = tcp_hdr(skb);
816 struct sk_buff *buff;
818 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
819 struct sock *ctl_sk = net->ipv6.tcp_sk;
820 unsigned int tot_len = sizeof(struct tcphdr);
821 struct dst_entry *dst;
826 tot_len += TCPOLEN_TSTAMP_ALIGNED;
827 #ifdef CONFIG_TCP_MD5SIG
829 tot_len += TCPOLEN_MD5SIG_ALIGNED;
832 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
837 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
839 t1 = skb_push(buff, tot_len);
840 skb_reset_transport_header(buff);
842 /* Swap the send and the receive. */
843 memset(t1, 0, sizeof(*t1));
844 t1->dest = th->source;
845 t1->source = th->dest;
846 t1->doff = tot_len / 4;
847 t1->seq = htonl(seq);
848 t1->ack_seq = htonl(ack);
849 t1->ack = !rst || !th->ack;
851 t1->window = htons(win);
853 topt = (__be32 *)(t1 + 1);
856 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
857 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
858 *topt++ = htonl(tsval);
859 *topt++ = htonl(tsecr);
862 #ifdef CONFIG_TCP_MD5SIG
864 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
865 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
866 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
867 &ipv6_hdr(skb)->saddr,
868 &ipv6_hdr(skb)->daddr, t1);
872 memset(&fl6, 0, sizeof(fl6));
873 fl6.daddr = ipv6_hdr(skb)->saddr;
874 fl6.saddr = ipv6_hdr(skb)->daddr;
875 fl6.flowlabel = label;
877 buff->ip_summed = CHECKSUM_PARTIAL;
880 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
882 fl6.flowi6_proto = IPPROTO_TCP;
883 if (rt6_need_strict(&fl6.daddr) && !oif)
884 fl6.flowi6_oif = tcp_v6_iif(skb);
886 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
889 fl6.flowi6_oif = oif;
893 mark = (sk->sk_state == TCP_TIME_WAIT) ?
894 inet_twsk(sk)->tw_mark : sk->sk_mark;
895 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
896 fl6.fl6_dport = t1->dest;
897 fl6.fl6_sport = t1->source;
898 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
899 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
901 /* Pass a socket to ip6_dst_lookup either it is for RST
902 * Underlying function will use this to retrieve the network
905 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
907 skb_dst_set(buff, dst);
908 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
909 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
911 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
918 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
920 const struct tcphdr *th = tcp_hdr(skb);
921 u32 seq = 0, ack_seq = 0;
922 struct tcp_md5sig_key *key = NULL;
923 #ifdef CONFIG_TCP_MD5SIG
924 const __u8 *hash_location = NULL;
925 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
926 unsigned char newhash[16];
928 struct sock *sk1 = NULL;
935 /* If sk not NULL, it means we did a successful lookup and incoming
936 * route had to be correct. prequeue might have dropped our dst.
938 if (!sk && !ipv6_unicast_destination(skb))
941 #ifdef CONFIG_TCP_MD5SIG
943 hash_location = tcp_parse_md5sig_option(th);
944 if (sk && sk_fullsock(sk)) {
945 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
946 } else if (hash_location) {
948 * active side is lost. Try to find listening socket through
949 * source port, and then find md5 key through listening socket.
950 * we are not loose security here:
951 * Incoming packet is checked with md5 hash with finding key,
952 * no RST generated if md5 hash doesn't match.
954 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
955 &tcp_hashinfo, NULL, 0,
957 th->source, &ipv6h->daddr,
959 tcp_v6_iif_l3_slave(skb),
964 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
968 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
969 if (genhash || memcmp(hash_location, newhash, 16) != 0)
975 seq = ntohl(th->ack_seq);
977 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
981 oif = sk->sk_bound_dev_if;
983 trace_tcp_send_reset(sk, skb);
986 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
988 #ifdef CONFIG_TCP_MD5SIG
994 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
995 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
996 struct tcp_md5sig_key *key, u8 tclass,
999 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1003 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1005 struct inet_timewait_sock *tw = inet_twsk(sk);
1006 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1008 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1009 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1010 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1011 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1012 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1017 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1018 struct request_sock *req)
1020 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1021 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1024 * The window field (SEG.WND) of every outgoing segment, with the
1025 * exception of <SYN> segments, MUST be right-shifted by
1026 * Rcv.Wind.Shift bits:
1028 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1029 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1030 tcp_rsk(req)->rcv_nxt,
1031 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1032 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1033 req->ts_recent, sk->sk_bound_dev_if,
1034 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1039 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1041 #ifdef CONFIG_SYN_COOKIES
1042 const struct tcphdr *th = tcp_hdr(skb);
1045 sk = cookie_v6_check(sk, skb);
1050 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1052 if (skb->protocol == htons(ETH_P_IP))
1053 return tcp_v4_conn_request(sk, skb);
1055 if (!ipv6_unicast_destination(skb))
1058 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1059 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1063 return tcp_conn_request(&tcp6_request_sock_ops,
1064 &tcp_request_sock_ipv6_ops, sk, skb);
1068 return 0; /* don't send reset */
1071 static void tcp_v6_restore_cb(struct sk_buff *skb)
1073 /* We need to move header back to the beginning if xfrm6_policy_check()
1074 * and tcp_v6_fill_cb() are going to be called again.
1075 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1077 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1078 sizeof(struct inet6_skb_parm));
1081 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1082 struct request_sock *req,
1083 struct dst_entry *dst,
1084 struct request_sock *req_unhash,
1087 struct inet_request_sock *ireq;
1088 struct ipv6_pinfo *newnp;
1089 const struct ipv6_pinfo *np = inet6_sk(sk);
1090 struct ipv6_txoptions *opt;
1091 struct tcp6_sock *newtcp6sk;
1092 struct inet_sock *newinet;
1093 struct tcp_sock *newtp;
1095 #ifdef CONFIG_TCP_MD5SIG
1096 struct tcp_md5sig_key *key;
1100 if (skb->protocol == htons(ETH_P_IP)) {
1105 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1106 req_unhash, own_req);
1111 newtcp6sk = (struct tcp6_sock *)newsk;
1112 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1114 newinet = inet_sk(newsk);
1115 newnp = inet6_sk(newsk);
1116 newtp = tcp_sk(newsk);
1118 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1120 newnp->saddr = newsk->sk_v6_rcv_saddr;
1122 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1123 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1124 #ifdef CONFIG_TCP_MD5SIG
1125 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1128 newnp->ipv6_mc_list = NULL;
1129 newnp->ipv6_ac_list = NULL;
1130 newnp->ipv6_fl_list = NULL;
1131 newnp->pktoptions = NULL;
1133 newnp->mcast_oif = inet_iif(skb);
1134 newnp->mcast_hops = ip_hdr(skb)->ttl;
1135 newnp->rcv_flowinfo = 0;
1137 newnp->flow_label = 0;
1140 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1141 * here, tcp_create_openreq_child now does this for us, see the comment in
1142 * that function for the gory details. -acme
1145 /* It is tricky place. Until this moment IPv4 tcp
1146 worked with IPv6 icsk.icsk_af_ops.
1149 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1154 ireq = inet_rsk(req);
1156 if (sk_acceptq_is_full(sk))
1160 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1165 newsk = tcp_create_openreq_child(sk, req, skb);
1170 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1171 * count here, tcp_create_openreq_child now does this for us, see the
1172 * comment in that function for the gory details. -acme
1175 newsk->sk_gso_type = SKB_GSO_TCPV6;
1176 ip6_dst_store(newsk, dst, NULL, NULL);
1177 inet6_sk_rx_dst_set(newsk, skb);
1179 newtcp6sk = (struct tcp6_sock *)newsk;
1180 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1182 newtp = tcp_sk(newsk);
1183 newinet = inet_sk(newsk);
1184 newnp = inet6_sk(newsk);
1186 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1188 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1189 newnp->saddr = ireq->ir_v6_loc_addr;
1190 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1191 newsk->sk_bound_dev_if = ireq->ir_iif;
1193 /* Now IPv6 options...
1195 First: no IPv4 options.
1197 newinet->inet_opt = NULL;
1198 newnp->ipv6_mc_list = NULL;
1199 newnp->ipv6_ac_list = NULL;
1200 newnp->ipv6_fl_list = NULL;
1203 newnp->rxopt.all = np->rxopt.all;
1205 newnp->pktoptions = NULL;
1207 newnp->mcast_oif = tcp_v6_iif(skb);
1208 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1209 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1211 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1213 /* Clone native IPv6 options from listening socket (if any)
1215 Yes, keeping reference count would be much more clever,
1216 but we make one more one thing there: reattach optmem
1219 opt = ireq->ipv6_opt;
1221 opt = rcu_dereference(np->opt);
1223 opt = ipv6_dup_options(newsk, opt);
1224 RCU_INIT_POINTER(newnp->opt, opt);
1226 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1228 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1231 tcp_ca_openreq_child(newsk, dst);
1233 tcp_sync_mss(newsk, dst_mtu(dst));
1234 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1236 tcp_initialize_rcv_mss(newsk);
1238 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1239 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1241 #ifdef CONFIG_TCP_MD5SIG
1242 /* Copy over the MD5 key from the original socket */
1243 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1245 /* We're using one, so create a matching key
1246 * on the newsk structure. If we fail to get
1247 * memory, then we end up not copying the key
1250 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1251 AF_INET6, 128, key->key, key->keylen,
1252 sk_gfp_mask(sk, GFP_ATOMIC));
1256 if (__inet_inherit_port(sk, newsk) < 0) {
1257 inet_csk_prepare_forced_close(newsk);
1261 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1263 tcp_move_syn(newtp, req);
1265 /* Clone pktoptions received with SYN, if we own the req */
1266 if (ireq->pktopts) {
1267 newnp->pktoptions = skb_clone(ireq->pktopts,
1268 sk_gfp_mask(sk, GFP_ATOMIC));
1269 consume_skb(ireq->pktopts);
1270 ireq->pktopts = NULL;
1271 if (newnp->pktoptions) {
1272 tcp_v6_restore_cb(newnp->pktoptions);
1273 skb_set_owner_r(newnp->pktoptions, newsk);
1281 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1289 /* The socket must have it's spinlock held when we get
1290 * here, unless it is a TCP_LISTEN socket.
1292 * We have a potential double-lock case here, so even when
1293 * doing backlog processing we use the BH locking scheme.
1294 * This is because we cannot sleep with the original spinlock
1297 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1299 struct ipv6_pinfo *np = inet6_sk(sk);
1300 struct tcp_sock *tp;
1301 struct sk_buff *opt_skb = NULL;
1303 /* Imagine: socket is IPv6. IPv4 packet arrives,
1304 goes to IPv4 receive handler and backlogged.
1305 From backlog it always goes here. Kerboom...
1306 Fortunately, tcp_rcv_established and rcv_established
1307 handle them correctly, but it is not case with
1308 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1311 if (skb->protocol == htons(ETH_P_IP))
1312 return tcp_v4_do_rcv(sk, skb);
1315 * socket locking is here for SMP purposes as backlog rcv
1316 * is currently called with bh processing disabled.
1319 /* Do Stevens' IPV6_PKTOPTIONS.
1321 Yes, guys, it is the only place in our code, where we
1322 may make it not affecting IPv4.
1323 The rest of code is protocol independent,
1324 and I do not like idea to uglify IPv4.
1326 Actually, all the idea behind IPV6_PKTOPTIONS
1327 looks not very well thought. For now we latch
1328 options, received in the last packet, enqueued
1329 by tcp. Feel free to propose better solution.
1333 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1335 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1336 struct dst_entry *dst = sk->sk_rx_dst;
1338 sock_rps_save_rxhash(sk, skb);
1339 sk_mark_napi_id(sk, skb);
1341 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1342 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1344 sk->sk_rx_dst = NULL;
1348 tcp_rcv_established(sk, skb);
1350 goto ipv6_pktoptions;
1354 if (tcp_checksum_complete(skb))
1357 if (sk->sk_state == TCP_LISTEN) {
1358 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1364 if (tcp_child_process(sk, nsk, skb))
1367 __kfree_skb(opt_skb);
1371 sock_rps_save_rxhash(sk, skb);
1373 if (tcp_rcv_state_process(sk, skb))
1376 goto ipv6_pktoptions;
1380 tcp_v6_send_reset(sk, skb);
1383 __kfree_skb(opt_skb);
1387 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1388 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1393 /* Do you ask, what is it?
1395 1. skb was enqueued by tcp.
1396 2. skb is added to tail of read queue, rather than out of order.
1397 3. socket is not in passive state.
1398 4. Finally, it really contains options, which user wants to receive.
1401 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1402 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1403 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1404 np->mcast_oif = tcp_v6_iif(opt_skb);
1405 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1406 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1407 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1408 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1410 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1411 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1412 skb_set_owner_r(opt_skb, sk);
1413 tcp_v6_restore_cb(opt_skb);
1414 opt_skb = xchg(&np->pktoptions, opt_skb);
1416 __kfree_skb(opt_skb);
1417 opt_skb = xchg(&np->pktoptions, NULL);
1425 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1426 const struct tcphdr *th)
1428 /* This is tricky: we move IP6CB at its correct location into
1429 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1430 * _decode_session6() uses IP6CB().
1431 * barrier() makes sure compiler won't play aliasing games.
1433 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1434 sizeof(struct inet6_skb_parm));
1437 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1438 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1439 skb->len - th->doff*4);
1440 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1441 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1442 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1443 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1444 TCP_SKB_CB(skb)->sacked = 0;
1445 TCP_SKB_CB(skb)->has_rxtstamp =
1446 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1449 static int tcp_v6_rcv(struct sk_buff *skb)
1451 int sdif = inet6_sdif(skb);
1452 const struct tcphdr *th;
1453 const struct ipv6hdr *hdr;
1457 struct net *net = dev_net(skb->dev);
1459 if (skb->pkt_type != PACKET_HOST)
1463 * Count it even if it's bad.
1465 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1467 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1470 th = (const struct tcphdr *)skb->data;
1472 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1474 if (!pskb_may_pull(skb, th->doff*4))
1477 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1480 th = (const struct tcphdr *)skb->data;
1481 hdr = ipv6_hdr(skb);
1484 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1485 th->source, th->dest, inet6_iif(skb), sdif,
1491 if (sk->sk_state == TCP_TIME_WAIT)
1494 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1495 struct request_sock *req = inet_reqsk(sk);
1496 bool req_stolen = false;
1499 sk = req->rsk_listener;
1500 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1501 sk_drops_add(sk, skb);
1505 if (tcp_checksum_complete(skb)) {
1509 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1510 inet_csk_reqsk_queue_drop_and_put(sk, req);
1516 if (!tcp_filter(sk, skb)) {
1517 th = (const struct tcphdr *)skb->data;
1518 hdr = ipv6_hdr(skb);
1519 tcp_v6_fill_cb(skb, hdr, th);
1520 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1525 /* Another cpu got exclusive access to req
1526 * and created a full blown socket.
1527 * Try to feed this packet to this socket
1528 * instead of discarding it.
1530 tcp_v6_restore_cb(skb);
1534 goto discard_and_relse;
1538 tcp_v6_restore_cb(skb);
1539 } else if (tcp_child_process(sk, nsk, skb)) {
1540 tcp_v6_send_reset(nsk, skb);
1541 goto discard_and_relse;
1547 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1548 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1549 goto discard_and_relse;
1552 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1553 goto discard_and_relse;
1555 if (tcp_v6_inbound_md5_hash(sk, skb))
1556 goto discard_and_relse;
1558 if (tcp_filter(sk, skb))
1559 goto discard_and_relse;
1560 th = (const struct tcphdr *)skb->data;
1561 hdr = ipv6_hdr(skb);
1562 tcp_v6_fill_cb(skb, hdr, th);
1566 if (sk->sk_state == TCP_LISTEN) {
1567 ret = tcp_v6_do_rcv(sk, skb);
1568 goto put_and_return;
1571 sk_incoming_cpu_update(sk);
1573 bh_lock_sock_nested(sk);
1574 tcp_segs_in(tcp_sk(sk), skb);
1576 if (!sock_owned_by_user(sk)) {
1577 ret = tcp_v6_do_rcv(sk, skb);
1578 } else if (tcp_add_backlog(sk, skb)) {
1579 goto discard_and_relse;
1586 return ret ? -1 : 0;
1589 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1592 tcp_v6_fill_cb(skb, hdr, th);
1594 if (tcp_checksum_complete(skb)) {
1596 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1598 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1600 tcp_v6_send_reset(NULL, skb);
1608 sk_drops_add(sk, skb);
1614 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1615 inet_twsk_put(inet_twsk(sk));
1619 tcp_v6_fill_cb(skb, hdr, th);
1621 if (tcp_checksum_complete(skb)) {
1622 inet_twsk_put(inet_twsk(sk));
1626 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1631 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1632 skb, __tcp_hdrlen(th),
1633 &ipv6_hdr(skb)->saddr, th->source,
1634 &ipv6_hdr(skb)->daddr,
1636 tcp_v6_iif_l3_slave(skb),
1639 struct inet_timewait_sock *tw = inet_twsk(sk);
1640 inet_twsk_deschedule_put(tw);
1642 tcp_v6_restore_cb(skb);
1650 tcp_v6_timewait_ack(sk, skb);
1653 tcp_v6_send_reset(sk, skb);
1654 inet_twsk_deschedule_put(inet_twsk(sk));
1656 case TCP_TW_SUCCESS:
1662 static void tcp_v6_early_demux(struct sk_buff *skb)
1664 const struct ipv6hdr *hdr;
1665 const struct tcphdr *th;
1668 if (skb->pkt_type != PACKET_HOST)
1671 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1674 hdr = ipv6_hdr(skb);
1677 if (th->doff < sizeof(struct tcphdr) / 4)
1680 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1681 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1682 &hdr->saddr, th->source,
1683 &hdr->daddr, ntohs(th->dest),
1684 inet6_iif(skb), inet6_sdif(skb));
1687 skb->destructor = sock_edemux;
1688 if (sk_fullsock(sk)) {
1689 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1692 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1694 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1695 skb_dst_set_noref(skb, dst);
1700 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1701 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1702 .twsk_unique = tcp_twsk_unique,
1703 .twsk_destructor = tcp_twsk_destructor,
1706 static const struct inet_connection_sock_af_ops ipv6_specific = {
1707 .queue_xmit = inet6_csk_xmit,
1708 .send_check = tcp_v6_send_check,
1709 .rebuild_header = inet6_sk_rebuild_header,
1710 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1711 .conn_request = tcp_v6_conn_request,
1712 .syn_recv_sock = tcp_v6_syn_recv_sock,
1713 .net_header_len = sizeof(struct ipv6hdr),
1714 .net_frag_header_len = sizeof(struct frag_hdr),
1715 .setsockopt = ipv6_setsockopt,
1716 .getsockopt = ipv6_getsockopt,
1717 .addr2sockaddr = inet6_csk_addr2sockaddr,
1718 .sockaddr_len = sizeof(struct sockaddr_in6),
1719 #ifdef CONFIG_COMPAT
1720 .compat_setsockopt = compat_ipv6_setsockopt,
1721 .compat_getsockopt = compat_ipv6_getsockopt,
1723 .mtu_reduced = tcp_v6_mtu_reduced,
1726 #ifdef CONFIG_TCP_MD5SIG
1727 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1728 .md5_lookup = tcp_v6_md5_lookup,
1729 .calc_md5_hash = tcp_v6_md5_hash_skb,
1730 .md5_parse = tcp_v6_parse_md5_keys,
1735 * TCP over IPv4 via INET6 API
1737 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1738 .queue_xmit = ip_queue_xmit,
1739 .send_check = tcp_v4_send_check,
1740 .rebuild_header = inet_sk_rebuild_header,
1741 .sk_rx_dst_set = inet_sk_rx_dst_set,
1742 .conn_request = tcp_v6_conn_request,
1743 .syn_recv_sock = tcp_v6_syn_recv_sock,
1744 .net_header_len = sizeof(struct iphdr),
1745 .setsockopt = ipv6_setsockopt,
1746 .getsockopt = ipv6_getsockopt,
1747 .addr2sockaddr = inet6_csk_addr2sockaddr,
1748 .sockaddr_len = sizeof(struct sockaddr_in6),
1749 #ifdef CONFIG_COMPAT
1750 .compat_setsockopt = compat_ipv6_setsockopt,
1751 .compat_getsockopt = compat_ipv6_getsockopt,
1753 .mtu_reduced = tcp_v4_mtu_reduced,
1756 #ifdef CONFIG_TCP_MD5SIG
1757 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1758 .md5_lookup = tcp_v4_md5_lookup,
1759 .calc_md5_hash = tcp_v4_md5_hash_skb,
1760 .md5_parse = tcp_v6_parse_md5_keys,
1764 /* NOTE: A lot of things set to zero explicitly by call to
1765 * sk_alloc() so need not be done here.
1767 static int tcp_v6_init_sock(struct sock *sk)
1769 struct inet_connection_sock *icsk = inet_csk(sk);
1773 icsk->icsk_af_ops = &ipv6_specific;
1775 #ifdef CONFIG_TCP_MD5SIG
1776 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1782 static void tcp_v6_destroy_sock(struct sock *sk)
1784 tcp_v4_destroy_sock(sk);
1785 inet6_destroy_sock(sk);
1788 #ifdef CONFIG_PROC_FS
1789 /* Proc filesystem TCPv6 sock list dumping. */
1790 static void get_openreq6(struct seq_file *seq,
1791 const struct request_sock *req, int i)
1793 long ttd = req->rsk_timer.expires - jiffies;
1794 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1795 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1801 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1802 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1804 src->s6_addr32[0], src->s6_addr32[1],
1805 src->s6_addr32[2], src->s6_addr32[3],
1806 inet_rsk(req)->ir_num,
1807 dest->s6_addr32[0], dest->s6_addr32[1],
1808 dest->s6_addr32[2], dest->s6_addr32[3],
1809 ntohs(inet_rsk(req)->ir_rmt_port),
1811 0, 0, /* could print option size, but that is af dependent. */
1812 1, /* timers active (only the expire timer) */
1813 jiffies_to_clock_t(ttd),
1815 from_kuid_munged(seq_user_ns(seq),
1816 sock_i_uid(req->rsk_listener)),
1817 0, /* non standard timer */
1818 0, /* open_requests have no inode */
1822 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1824 const struct in6_addr *dest, *src;
1827 unsigned long timer_expires;
1828 const struct inet_sock *inet = inet_sk(sp);
1829 const struct tcp_sock *tp = tcp_sk(sp);
1830 const struct inet_connection_sock *icsk = inet_csk(sp);
1831 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1835 dest = &sp->sk_v6_daddr;
1836 src = &sp->sk_v6_rcv_saddr;
1837 destp = ntohs(inet->inet_dport);
1838 srcp = ntohs(inet->inet_sport);
1840 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1841 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1842 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1844 timer_expires = icsk->icsk_timeout;
1845 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1847 timer_expires = icsk->icsk_timeout;
1848 } else if (timer_pending(&sp->sk_timer)) {
1850 timer_expires = sp->sk_timer.expires;
1853 timer_expires = jiffies;
1856 state = inet_sk_state_load(sp);
1857 if (state == TCP_LISTEN)
1858 rx_queue = sp->sk_ack_backlog;
1860 /* Because we don't lock the socket,
1861 * we might find a transient negative value.
1863 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1864 READ_ONCE(tp->copied_seq), 0);
1867 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1868 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1870 src->s6_addr32[0], src->s6_addr32[1],
1871 src->s6_addr32[2], src->s6_addr32[3], srcp,
1872 dest->s6_addr32[0], dest->s6_addr32[1],
1873 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1875 READ_ONCE(tp->write_seq) - tp->snd_una,
1878 jiffies_delta_to_clock_t(timer_expires - jiffies),
1879 icsk->icsk_retransmits,
1880 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1881 icsk->icsk_probes_out,
1883 refcount_read(&sp->sk_refcnt), sp,
1884 jiffies_to_clock_t(icsk->icsk_rto),
1885 jiffies_to_clock_t(icsk->icsk_ack.ato),
1886 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1888 state == TCP_LISTEN ?
1889 fastopenq->max_qlen :
1890 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1894 static void get_timewait6_sock(struct seq_file *seq,
1895 struct inet_timewait_sock *tw, int i)
1897 long delta = tw->tw_timer.expires - jiffies;
1898 const struct in6_addr *dest, *src;
1901 dest = &tw->tw_v6_daddr;
1902 src = &tw->tw_v6_rcv_saddr;
1903 destp = ntohs(tw->tw_dport);
1904 srcp = ntohs(tw->tw_sport);
1907 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1908 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1910 src->s6_addr32[0], src->s6_addr32[1],
1911 src->s6_addr32[2], src->s6_addr32[3], srcp,
1912 dest->s6_addr32[0], dest->s6_addr32[1],
1913 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1914 tw->tw_substate, 0, 0,
1915 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1916 refcount_read(&tw->tw_refcnt), tw);
1919 static int tcp6_seq_show(struct seq_file *seq, void *v)
1921 struct tcp_iter_state *st;
1922 struct sock *sk = v;
1924 if (v == SEQ_START_TOKEN) {
1929 "st tx_queue rx_queue tr tm->when retrnsmt"
1930 " uid timeout inode\n");
1935 if (sk->sk_state == TCP_TIME_WAIT)
1936 get_timewait6_sock(seq, v, st->num);
1937 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1938 get_openreq6(seq, v, st->num);
1940 get_tcp6_sock(seq, v, st->num);
1945 static const struct seq_operations tcp6_seq_ops = {
1946 .show = tcp6_seq_show,
1947 .start = tcp_seq_start,
1948 .next = tcp_seq_next,
1949 .stop = tcp_seq_stop,
1952 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1956 int __net_init tcp6_proc_init(struct net *net)
1958 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1959 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1964 void tcp6_proc_exit(struct net *net)
1966 remove_proc_entry("tcp6", net->proc_net);
1970 struct proto tcpv6_prot = {
1972 .owner = THIS_MODULE,
1974 .pre_connect = tcp_v6_pre_connect,
1975 .connect = tcp_v6_connect,
1976 .disconnect = tcp_disconnect,
1977 .accept = inet_csk_accept,
1979 .init = tcp_v6_init_sock,
1980 .destroy = tcp_v6_destroy_sock,
1981 .shutdown = tcp_shutdown,
1982 .setsockopt = tcp_setsockopt,
1983 .getsockopt = tcp_getsockopt,
1984 .keepalive = tcp_set_keepalive,
1985 .recvmsg = tcp_recvmsg,
1986 .sendmsg = tcp_sendmsg,
1987 .sendpage = tcp_sendpage,
1988 .backlog_rcv = tcp_v6_do_rcv,
1989 .release_cb = tcp_release_cb,
1991 .unhash = inet_unhash,
1992 .get_port = inet_csk_get_port,
1993 .enter_memory_pressure = tcp_enter_memory_pressure,
1994 .leave_memory_pressure = tcp_leave_memory_pressure,
1995 .stream_memory_free = tcp_stream_memory_free,
1996 .sockets_allocated = &tcp_sockets_allocated,
1997 .memory_allocated = &tcp_memory_allocated,
1998 .memory_pressure = &tcp_memory_pressure,
1999 .orphan_count = &tcp_orphan_count,
2000 .sysctl_mem = sysctl_tcp_mem,
2001 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2002 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2003 .max_header = MAX_TCP_HEADER,
2004 .obj_size = sizeof(struct tcp6_sock),
2005 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2006 .twsk_prot = &tcp6_timewait_sock_ops,
2007 .rsk_prot = &tcp6_request_sock_ops,
2008 .h.hashinfo = &tcp_hashinfo,
2009 .no_autobind = true,
2010 #ifdef CONFIG_COMPAT
2011 .compat_setsockopt = compat_tcp_setsockopt,
2012 .compat_getsockopt = compat_tcp_getsockopt,
2014 .diag_destroy = tcp_abort,
2017 /* thinking of making this const? Don't.
2018 * early_demux can change based on sysctl.
2020 static struct inet6_protocol tcpv6_protocol = {
2021 .early_demux = tcp_v6_early_demux,
2022 .early_demux_handler = tcp_v6_early_demux,
2023 .handler = tcp_v6_rcv,
2024 .err_handler = tcp_v6_err,
2025 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2028 static struct inet_protosw tcpv6_protosw = {
2029 .type = SOCK_STREAM,
2030 .protocol = IPPROTO_TCP,
2031 .prot = &tcpv6_prot,
2032 .ops = &inet6_stream_ops,
2033 .flags = INET_PROTOSW_PERMANENT |
2037 static int __net_init tcpv6_net_init(struct net *net)
2039 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2040 SOCK_RAW, IPPROTO_TCP, net);
2043 static void __net_exit tcpv6_net_exit(struct net *net)
2045 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2048 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2050 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2053 static struct pernet_operations tcpv6_net_ops = {
2054 .init = tcpv6_net_init,
2055 .exit = tcpv6_net_exit,
2056 .exit_batch = tcpv6_net_exit_batch,
2059 int __init tcpv6_init(void)
2063 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2067 /* register inet6 protocol */
2068 ret = inet6_register_protosw(&tcpv6_protosw);
2070 goto out_tcpv6_protocol;
2072 ret = register_pernet_subsys(&tcpv6_net_ops);
2074 goto out_tcpv6_protosw;
2079 inet6_unregister_protosw(&tcpv6_protosw);
2081 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2085 void tcpv6_exit(void)
2087 unregister_pernet_subsys(&tcpv6_net_ops);
2088 inet6_unregister_protosw(&tcpv6_protosw);
2089 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);