3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
98 rcu_assign_pointer(sk->sk_rx_dst, dst);
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
106 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source);
112 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
114 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32);
118 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
122 struct inet_sock *inet = inet_sk(sk);
123 struct inet_connection_sock *icsk = inet_csk(sk);
124 struct ipv6_pinfo *np = inet6_sk(sk);
125 struct tcp_sock *tp = tcp_sk(sk);
126 struct in6_addr *saddr = NULL, *final_p, final;
127 struct ipv6_txoptions *opt;
129 struct dst_entry *dst;
132 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return -EAFNOSUPPORT;
140 memset(&fl6, 0, sizeof(fl6));
143 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl6.flowlabel);
145 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
150 fl6_sock_release(flowlabel);
155 * connect() to INADDR_ANY means loopback (BSD'ism).
158 if (ipv6_addr_any(&usin->sin6_addr)) {
159 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
160 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
163 usin->sin6_addr = in6addr_loopback;
166 addr_type = ipv6_addr_type(&usin->sin6_addr);
168 if (addr_type & IPV6_ADDR_MULTICAST)
171 if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 if (addr_len >= sizeof(struct sockaddr_in6) &&
173 usin->sin6_scope_id) {
174 /* If interface is set while binding, indices
177 if (sk->sk_bound_dev_if &&
178 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 sk->sk_bound_dev_if = usin->sin6_scope_id;
184 /* Connect to link-local address requires an interface */
185 if (!sk->sk_bound_dev_if)
189 if (tp->rx_opt.ts_recent_stamp &&
190 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
196 sk->sk_v6_daddr = usin->sin6_addr;
197 np->flow_label = fl6.flowlabel;
203 if (addr_type & IPV6_ADDR_MAPPED) {
204 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205 struct sockaddr_in sin;
207 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
209 if (__ipv6_only_sock(sk))
212 sin.sin_family = AF_INET;
213 sin.sin_port = usin->sin6_port;
214 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
216 icsk->icsk_af_ops = &ipv6_mapped;
217 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218 #ifdef CONFIG_TCP_MD5SIG
219 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
222 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
225 icsk->icsk_ext_hdr_len = exthdrlen;
226 icsk->icsk_af_ops = &ipv6_specific;
227 sk->sk_backlog_rcv = tcp_v6_do_rcv;
228 #ifdef CONFIG_TCP_MD5SIG
229 tp->af_specific = &tcp_sock_ipv6_specific;
233 np->saddr = sk->sk_v6_rcv_saddr;
238 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
239 saddr = &sk->sk_v6_rcv_saddr;
241 fl6.flowi6_proto = IPPROTO_TCP;
242 fl6.daddr = sk->sk_v6_daddr;
243 fl6.saddr = saddr ? *saddr : np->saddr;
244 fl6.flowi6_oif = sk->sk_bound_dev_if;
245 fl6.flowi6_mark = sk->sk_mark;
246 fl6.fl6_dport = usin->sin6_port;
247 fl6.fl6_sport = inet->inet_sport;
248 fl6.flowi6_uid = sk->sk_uid;
250 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
251 final_p = fl6_update_dst(&fl6, opt, &final);
253 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
255 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
263 sk->sk_v6_rcv_saddr = *saddr;
266 /* set the source address */
268 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
270 sk->sk_gso_type = SKB_GSO_TCPV6;
271 ip6_dst_store(sk, dst, NULL, NULL);
273 icsk->icsk_ext_hdr_len = 0;
275 icsk->icsk_ext_hdr_len = opt->opt_flen +
278 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
280 inet->inet_dport = usin->sin6_port;
282 tcp_set_state(sk, TCP_SYN_SENT);
283 err = inet6_hash_connect(tcp_death_row, sk);
289 if (likely(!tp->repair)) {
291 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
292 sk->sk_v6_daddr.s6_addr32,
295 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
297 sk->sk_v6_daddr.s6_addr32);
300 if (tcp_fastopen_defer_connect(sk, &err))
305 err = tcp_connect(sk);
312 tcp_set_state(sk, TCP_CLOSE);
313 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
314 inet_reset_saddr(sk);
316 inet->inet_dport = 0;
317 sk->sk_route_caps = 0;
321 static void tcp_v6_mtu_reduced(struct sock *sk)
323 struct dst_entry *dst;
326 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
329 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
331 /* Drop requests trying to increase our current mss.
332 * Check done in __ip6_rt_update_pmtu() is too late.
334 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
337 dst = inet6_csk_update_pmtu(sk, mtu);
341 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
342 tcp_sync_mss(sk, dst_mtu(dst));
343 tcp_simple_retransmit(sk);
347 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
348 u8 type, u8 code, int offset, __be32 info)
350 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
351 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
352 struct net *net = dev_net(skb->dev);
353 struct request_sock *fastopen;
354 struct ipv6_pinfo *np;
361 sk = __inet6_lookup_established(net, &tcp_hashinfo,
362 &hdr->daddr, th->dest,
363 &hdr->saddr, ntohs(th->source),
364 skb->dev->ifindex, inet6_sdif(skb));
367 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
372 if (sk->sk_state == TCP_TIME_WAIT) {
373 inet_twsk_put(inet_twsk(sk));
376 seq = ntohl(th->seq);
377 fatal = icmpv6_err_convert(type, code, &err);
378 if (sk->sk_state == TCP_NEW_SYN_RECV)
379 return tcp_req_err(sk, seq, fatal);
382 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
383 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
385 if (sk->sk_state == TCP_CLOSE)
388 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
389 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
394 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
395 fastopen = tp->fastopen_rsk;
396 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
397 if (sk->sk_state != TCP_LISTEN &&
398 !between(seq, snd_una, tp->snd_nxt)) {
399 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
405 if (type == NDISC_REDIRECT) {
406 if (!sock_owned_by_user(sk)) {
407 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
410 dst->ops->redirect(dst, sk, skb);
415 if (type == ICMPV6_PKT_TOOBIG) {
416 u32 mtu = ntohl(info);
418 /* We are not interested in TCP_LISTEN and open_requests
419 * (SYN-ACKs send out by Linux are always <576bytes so
420 * they should go through unfragmented).
422 if (sk->sk_state == TCP_LISTEN)
425 if (!ip6_sk_accept_pmtu(sk))
428 if (mtu < IPV6_MIN_MTU)
431 WRITE_ONCE(tp->mtu_info, mtu);
433 if (!sock_owned_by_user(sk))
434 tcp_v6_mtu_reduced(sk);
435 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
442 /* Might be for an request_sock */
443 switch (sk->sk_state) {
446 /* Only in fast or simultaneous open. If a fast open socket is
447 * is already accepted it is treated as a connected one below.
449 if (fastopen && !fastopen->sk)
452 if (!sock_owned_by_user(sk)) {
454 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
458 sk->sk_err_soft = err;
462 if (!sock_owned_by_user(sk) && np->recverr) {
464 sk->sk_error_report(sk);
466 sk->sk_err_soft = err;
474 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
476 struct request_sock *req,
477 struct tcp_fastopen_cookie *foc,
478 enum tcp_synack_type synack_type)
480 struct inet_request_sock *ireq = inet_rsk(req);
481 struct ipv6_pinfo *np = inet6_sk(sk);
482 struct ipv6_txoptions *opt;
483 struct flowi6 *fl6 = &fl->u.ip6;
487 /* First, grab a route. */
488 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
489 IPPROTO_TCP)) == NULL)
492 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
495 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
496 &ireq->ir_v6_rmt_addr);
498 fl6->daddr = ireq->ir_v6_rmt_addr;
499 if (np->repflow && ireq->pktopts)
500 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
503 opt = ireq->ipv6_opt;
505 opt = rcu_dereference(np->opt);
506 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
509 err = net_xmit_eval(err);
517 static void tcp_v6_reqsk_destructor(struct request_sock *req)
519 kfree(inet_rsk(req)->ipv6_opt);
520 kfree_skb(inet_rsk(req)->pktopts);
523 #ifdef CONFIG_TCP_MD5SIG
524 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
525 const struct in6_addr *addr)
527 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
530 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
531 const struct sock *addr_sk)
533 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
536 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
537 char __user *optval, int optlen)
539 struct tcp_md5sig cmd;
540 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
543 if (optlen < sizeof(cmd))
546 if (copy_from_user(&cmd, optval, sizeof(cmd)))
549 if (sin6->sin6_family != AF_INET6)
552 if (optname == TCP_MD5SIG_EXT &&
553 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
554 prefixlen = cmd.tcpm_prefixlen;
555 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
559 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
562 if (!cmd.tcpm_keylen) {
563 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
564 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
566 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
567 AF_INET6, prefixlen);
570 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
573 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
574 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
575 AF_INET, prefixlen, cmd.tcpm_key,
576 cmd.tcpm_keylen, GFP_KERNEL);
578 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
579 AF_INET6, prefixlen, cmd.tcpm_key,
580 cmd.tcpm_keylen, GFP_KERNEL);
583 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
584 const struct in6_addr *daddr,
585 const struct in6_addr *saddr,
586 const struct tcphdr *th, int nbytes)
588 struct tcp6_pseudohdr *bp;
589 struct scatterlist sg;
593 /* 1. TCP pseudo-header (RFC2460) */
596 bp->protocol = cpu_to_be32(IPPROTO_TCP);
597 bp->len = cpu_to_be32(nbytes);
599 _th = (struct tcphdr *)(bp + 1);
600 memcpy(_th, th, sizeof(*th));
603 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
604 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
605 sizeof(*bp) + sizeof(*th));
606 return crypto_ahash_update(hp->md5_req);
609 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
610 const struct in6_addr *daddr, struct in6_addr *saddr,
611 const struct tcphdr *th)
613 struct tcp_md5sig_pool *hp;
614 struct ahash_request *req;
616 hp = tcp_get_md5sig_pool();
618 goto clear_hash_noput;
621 if (crypto_ahash_init(req))
623 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
625 if (tcp_md5_hash_key(hp, key))
627 ahash_request_set_crypt(req, NULL, md5_hash, 0);
628 if (crypto_ahash_final(req))
631 tcp_put_md5sig_pool();
635 tcp_put_md5sig_pool();
637 memset(md5_hash, 0, 16);
641 static int tcp_v6_md5_hash_skb(char *md5_hash,
642 const struct tcp_md5sig_key *key,
643 const struct sock *sk,
644 const struct sk_buff *skb)
646 const struct in6_addr *saddr, *daddr;
647 struct tcp_md5sig_pool *hp;
648 struct ahash_request *req;
649 const struct tcphdr *th = tcp_hdr(skb);
651 if (sk) { /* valid for establish/request sockets */
652 saddr = &sk->sk_v6_rcv_saddr;
653 daddr = &sk->sk_v6_daddr;
655 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
656 saddr = &ip6h->saddr;
657 daddr = &ip6h->daddr;
660 hp = tcp_get_md5sig_pool();
662 goto clear_hash_noput;
665 if (crypto_ahash_init(req))
668 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
670 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
672 if (tcp_md5_hash_key(hp, key))
674 ahash_request_set_crypt(req, NULL, md5_hash, 0);
675 if (crypto_ahash_final(req))
678 tcp_put_md5sig_pool();
682 tcp_put_md5sig_pool();
684 memset(md5_hash, 0, 16);
690 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
691 const struct sk_buff *skb)
693 #ifdef CONFIG_TCP_MD5SIG
694 const __u8 *hash_location = NULL;
695 struct tcp_md5sig_key *hash_expected;
696 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
697 const struct tcphdr *th = tcp_hdr(skb);
701 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
702 hash_location = tcp_parse_md5sig_option(th);
704 /* We've parsed the options - do we have a hash? */
705 if (!hash_expected && !hash_location)
708 if (hash_expected && !hash_location) {
709 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
713 if (!hash_expected && hash_location) {
714 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
718 /* check the signature */
719 genhash = tcp_v6_md5_hash_skb(newhash,
723 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
724 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
725 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
726 genhash ? "failed" : "mismatch",
727 &ip6h->saddr, ntohs(th->source),
728 &ip6h->daddr, ntohs(th->dest));
735 static void tcp_v6_init_req(struct request_sock *req,
736 const struct sock *sk_listener,
739 struct inet_request_sock *ireq = inet_rsk(req);
740 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
742 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
743 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
745 /* So that link locals have meaning */
746 if (!sk_listener->sk_bound_dev_if &&
747 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
748 ireq->ir_iif = tcp_v6_iif(skb);
750 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
751 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
752 np->rxopt.bits.rxinfo ||
753 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
754 np->rxopt.bits.rxohlim || np->repflow)) {
755 refcount_inc(&skb->users);
760 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
762 const struct request_sock *req)
764 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
767 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
769 .obj_size = sizeof(struct tcp6_request_sock),
770 .rtx_syn_ack = tcp_rtx_synack,
771 .send_ack = tcp_v6_reqsk_send_ack,
772 .destructor = tcp_v6_reqsk_destructor,
773 .send_reset = tcp_v6_send_reset,
774 .syn_ack_timeout = tcp_syn_ack_timeout,
777 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
778 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
779 sizeof(struct ipv6hdr),
780 #ifdef CONFIG_TCP_MD5SIG
781 .req_md5_lookup = tcp_v6_md5_lookup,
782 .calc_md5_hash = tcp_v6_md5_hash_skb,
784 .init_req = tcp_v6_init_req,
785 #ifdef CONFIG_SYN_COOKIES
786 .cookie_init_seq = cookie_v6_init_sequence,
788 .route_req = tcp_v6_route_req,
789 .init_seq = tcp_v6_init_seq,
790 .init_ts_off = tcp_v6_init_ts_off,
791 .send_synack = tcp_v6_send_synack,
794 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
795 u32 ack, u32 win, u32 tsval, u32 tsecr,
796 int oif, struct tcp_md5sig_key *key, int rst,
797 u8 tclass, __be32 label)
799 const struct tcphdr *th = tcp_hdr(skb);
801 struct sk_buff *buff;
803 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
804 struct sock *ctl_sk = net->ipv6.tcp_sk;
805 unsigned int tot_len = sizeof(struct tcphdr);
806 struct dst_entry *dst;
810 tot_len += TCPOLEN_TSTAMP_ALIGNED;
811 #ifdef CONFIG_TCP_MD5SIG
813 tot_len += TCPOLEN_MD5SIG_ALIGNED;
816 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
821 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
823 t1 = skb_push(buff, tot_len);
824 skb_reset_transport_header(buff);
826 /* Swap the send and the receive. */
827 memset(t1, 0, sizeof(*t1));
828 t1->dest = th->source;
829 t1->source = th->dest;
830 t1->doff = tot_len / 4;
831 t1->seq = htonl(seq);
832 t1->ack_seq = htonl(ack);
833 t1->ack = !rst || !th->ack;
835 t1->window = htons(win);
837 topt = (__be32 *)(t1 + 1);
840 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
841 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
842 *topt++ = htonl(tsval);
843 *topt++ = htonl(tsecr);
846 #ifdef CONFIG_TCP_MD5SIG
848 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
849 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
850 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
851 &ipv6_hdr(skb)->saddr,
852 &ipv6_hdr(skb)->daddr, t1);
856 memset(&fl6, 0, sizeof(fl6));
857 fl6.daddr = ipv6_hdr(skb)->saddr;
858 fl6.saddr = ipv6_hdr(skb)->daddr;
859 fl6.flowlabel = label;
861 buff->ip_summed = CHECKSUM_PARTIAL;
864 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
866 fl6.flowi6_proto = IPPROTO_TCP;
867 if (rt6_need_strict(&fl6.daddr) && !oif)
868 fl6.flowi6_oif = tcp_v6_iif(skb);
870 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
873 fl6.flowi6_oif = oif;
876 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
877 fl6.fl6_dport = t1->dest;
878 fl6.fl6_sport = t1->source;
879 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
880 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
882 /* Pass a socket to ip6_dst_lookup either it is for RST
883 * Underlying function will use this to retrieve the network
886 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
888 skb_dst_set(buff, dst);
889 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
890 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
892 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
899 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
901 const struct tcphdr *th = tcp_hdr(skb);
902 u32 seq = 0, ack_seq = 0;
903 struct tcp_md5sig_key *key = NULL;
904 #ifdef CONFIG_TCP_MD5SIG
905 const __u8 *hash_location = NULL;
906 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
907 unsigned char newhash[16];
909 struct sock *sk1 = NULL;
916 /* If sk not NULL, it means we did a successful lookup and incoming
917 * route had to be correct. prequeue might have dropped our dst.
919 if (!sk && !ipv6_unicast_destination(skb))
922 #ifdef CONFIG_TCP_MD5SIG
924 hash_location = tcp_parse_md5sig_option(th);
925 if (sk && sk_fullsock(sk)) {
926 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
927 } else if (hash_location) {
929 * active side is lost. Try to find listening socket through
930 * source port, and then find md5 key through listening socket.
931 * we are not loose security here:
932 * Incoming packet is checked with md5 hash with finding key,
933 * no RST generated if md5 hash doesn't match.
935 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
936 &tcp_hashinfo, NULL, 0,
938 th->source, &ipv6h->daddr,
940 tcp_v6_iif_l3_slave(skb),
945 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
949 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
950 if (genhash || memcmp(hash_location, newhash, 16) != 0)
956 seq = ntohl(th->ack_seq);
958 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
961 oif = sk ? sk->sk_bound_dev_if : 0;
962 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
964 #ifdef CONFIG_TCP_MD5SIG
970 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
971 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
972 struct tcp_md5sig_key *key, u8 tclass,
975 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
979 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
981 struct inet_timewait_sock *tw = inet_twsk(sk);
982 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
984 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
985 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
986 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
987 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
988 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
993 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
994 struct request_sock *req)
996 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
997 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1000 * The window field (SEG.WND) of every outgoing segment, with the
1001 * exception of <SYN> segments, MUST be right-shifted by
1002 * Rcv.Wind.Shift bits:
1004 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1005 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1006 tcp_rsk(req)->rcv_nxt,
1007 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1008 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1009 req->ts_recent, sk->sk_bound_dev_if,
1010 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1015 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1017 #ifdef CONFIG_SYN_COOKIES
1018 const struct tcphdr *th = tcp_hdr(skb);
1021 sk = cookie_v6_check(sk, skb);
1026 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1028 if (skb->protocol == htons(ETH_P_IP))
1029 return tcp_v4_conn_request(sk, skb);
1031 if (!ipv6_unicast_destination(skb))
1034 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1035 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1039 return tcp_conn_request(&tcp6_request_sock_ops,
1040 &tcp_request_sock_ipv6_ops, sk, skb);
1044 return 0; /* don't send reset */
1047 static void tcp_v6_restore_cb(struct sk_buff *skb)
1049 /* We need to move header back to the beginning if xfrm6_policy_check()
1050 * and tcp_v6_fill_cb() are going to be called again.
1051 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1053 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1054 sizeof(struct inet6_skb_parm));
1057 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1058 struct request_sock *req,
1059 struct dst_entry *dst,
1060 struct request_sock *req_unhash,
1063 struct inet_request_sock *ireq;
1064 struct ipv6_pinfo *newnp;
1065 const struct ipv6_pinfo *np = inet6_sk(sk);
1066 struct ipv6_txoptions *opt;
1067 struct tcp6_sock *newtcp6sk;
1068 struct inet_sock *newinet;
1069 bool found_dup_sk = false;
1070 struct tcp_sock *newtp;
1072 #ifdef CONFIG_TCP_MD5SIG
1073 struct tcp_md5sig_key *key;
1077 if (skb->protocol == htons(ETH_P_IP)) {
1082 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1083 req_unhash, own_req);
1088 newtcp6sk = (struct tcp6_sock *)newsk;
1089 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1091 newinet = inet_sk(newsk);
1092 newnp = inet6_sk(newsk);
1093 newtp = tcp_sk(newsk);
1095 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1097 newnp->saddr = newsk->sk_v6_rcv_saddr;
1099 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1100 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1101 #ifdef CONFIG_TCP_MD5SIG
1102 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1105 newnp->ipv6_mc_list = NULL;
1106 newnp->ipv6_ac_list = NULL;
1107 newnp->ipv6_fl_list = NULL;
1108 newnp->pktoptions = NULL;
1110 newnp->mcast_oif = inet_iif(skb);
1111 newnp->mcast_hops = ip_hdr(skb)->ttl;
1112 newnp->rcv_flowinfo = 0;
1114 newnp->flow_label = 0;
1117 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1118 * here, tcp_create_openreq_child now does this for us, see the comment in
1119 * that function for the gory details. -acme
1122 /* It is tricky place. Until this moment IPv4 tcp
1123 worked with IPv6 icsk.icsk_af_ops.
1126 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1131 ireq = inet_rsk(req);
1133 if (sk_acceptq_is_full(sk))
1137 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1142 newsk = tcp_create_openreq_child(sk, req, skb);
1147 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1148 * count here, tcp_create_openreq_child now does this for us, see the
1149 * comment in that function for the gory details. -acme
1152 newsk->sk_gso_type = SKB_GSO_TCPV6;
1153 ip6_dst_store(newsk, dst, NULL, NULL);
1154 inet6_sk_rx_dst_set(newsk, skb);
1156 newtcp6sk = (struct tcp6_sock *)newsk;
1157 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1159 newtp = tcp_sk(newsk);
1160 newinet = inet_sk(newsk);
1161 newnp = inet6_sk(newsk);
1163 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1165 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1166 newnp->saddr = ireq->ir_v6_loc_addr;
1167 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1168 newsk->sk_bound_dev_if = ireq->ir_iif;
1170 /* Now IPv6 options...
1172 First: no IPv4 options.
1174 newinet->inet_opt = NULL;
1175 newnp->ipv6_mc_list = NULL;
1176 newnp->ipv6_ac_list = NULL;
1177 newnp->ipv6_fl_list = NULL;
1180 newnp->rxopt.all = np->rxopt.all;
1182 newnp->pktoptions = NULL;
1184 newnp->mcast_oif = tcp_v6_iif(skb);
1185 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1186 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1188 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1190 /* Clone native IPv6 options from listening socket (if any)
1192 Yes, keeping reference count would be much more clever,
1193 but we make one more one thing there: reattach optmem
1196 opt = ireq->ipv6_opt;
1198 opt = rcu_dereference(np->opt);
1200 opt = ipv6_dup_options(newsk, opt);
1201 RCU_INIT_POINTER(newnp->opt, opt);
1203 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1205 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1208 tcp_ca_openreq_child(newsk, dst);
1210 tcp_sync_mss(newsk, dst_mtu(dst));
1211 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1213 tcp_initialize_rcv_mss(newsk);
1215 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1216 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1218 #ifdef CONFIG_TCP_MD5SIG
1219 /* Copy over the MD5 key from the original socket */
1220 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1222 /* We're using one, so create a matching key
1223 * on the newsk structure. If we fail to get
1224 * memory, then we end up not copying the key
1227 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1228 AF_INET6, 128, key->key, key->keylen,
1229 sk_gfp_mask(sk, GFP_ATOMIC));
1233 if (__inet_inherit_port(sk, newsk) < 0) {
1234 inet_csk_prepare_forced_close(newsk);
1238 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1241 tcp_move_syn(newtp, req);
1243 /* Clone pktoptions received with SYN, if we own the req */
1244 if (ireq->pktopts) {
1245 newnp->pktoptions = skb_clone(ireq->pktopts,
1246 sk_gfp_mask(sk, GFP_ATOMIC));
1247 consume_skb(ireq->pktopts);
1248 ireq->pktopts = NULL;
1249 if (newnp->pktoptions) {
1250 tcp_v6_restore_cb(newnp->pktoptions);
1251 skb_set_owner_r(newnp->pktoptions, newsk);
1255 if (!req_unhash && found_dup_sk) {
1256 /* This code path should only be executed in the
1257 * syncookie case only
1259 bh_unlock_sock(newsk);
1268 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1276 /* The socket must have it's spinlock held when we get
1277 * here, unless it is a TCP_LISTEN socket.
1279 * We have a potential double-lock case here, so even when
1280 * doing backlog processing we use the BH locking scheme.
1281 * This is because we cannot sleep with the original spinlock
1284 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1286 struct ipv6_pinfo *np = inet6_sk(sk);
1287 struct tcp_sock *tp;
1288 struct sk_buff *opt_skb = NULL;
1290 /* Imagine: socket is IPv6. IPv4 packet arrives,
1291 goes to IPv4 receive handler and backlogged.
1292 From backlog it always goes here. Kerboom...
1293 Fortunately, tcp_rcv_established and rcv_established
1294 handle them correctly, but it is not case with
1295 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1298 if (skb->protocol == htons(ETH_P_IP))
1299 return tcp_v4_do_rcv(sk, skb);
1302 * socket locking is here for SMP purposes as backlog rcv
1303 * is currently called with bh processing disabled.
1306 /* Do Stevens' IPV6_PKTOPTIONS.
1308 Yes, guys, it is the only place in our code, where we
1309 may make it not affecting IPv4.
1310 The rest of code is protocol independent,
1311 and I do not like idea to uglify IPv4.
1313 Actually, all the idea behind IPV6_PKTOPTIONS
1314 looks not very well thought. For now we latch
1315 options, received in the last packet, enqueued
1316 by tcp. Feel free to propose better solution.
1320 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1322 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1323 struct dst_entry *dst;
1325 dst = rcu_dereference_protected(sk->sk_rx_dst,
1326 lockdep_sock_is_held(sk));
1328 sock_rps_save_rxhash(sk, skb);
1329 sk_mark_napi_id(sk, skb);
1331 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1332 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1333 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1338 tcp_rcv_established(sk, skb, tcp_hdr(skb));
1340 goto ipv6_pktoptions;
1344 if (tcp_checksum_complete(skb))
1347 if (sk->sk_state == TCP_LISTEN) {
1348 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1354 if (tcp_child_process(sk, nsk, skb))
1357 __kfree_skb(opt_skb);
1361 sock_rps_save_rxhash(sk, skb);
1363 if (tcp_rcv_state_process(sk, skb))
1366 goto ipv6_pktoptions;
1370 tcp_v6_send_reset(sk, skb);
1373 __kfree_skb(opt_skb);
1377 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1378 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1383 /* Do you ask, what is it?
1385 1. skb was enqueued by tcp.
1386 2. skb is added to tail of read queue, rather than out of order.
1387 3. socket is not in passive state.
1388 4. Finally, it really contains options, which user wants to receive.
1391 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1392 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1393 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1394 np->mcast_oif = tcp_v6_iif(opt_skb);
1395 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1396 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1397 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1398 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1400 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1401 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1402 skb_set_owner_r(opt_skb, sk);
1403 tcp_v6_restore_cb(opt_skb);
1404 opt_skb = xchg(&np->pktoptions, opt_skb);
1406 __kfree_skb(opt_skb);
1407 opt_skb = xchg(&np->pktoptions, NULL);
1415 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1416 const struct tcphdr *th)
1418 /* This is tricky: we move IP6CB at its correct location into
1419 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1420 * _decode_session6() uses IP6CB().
1421 * barrier() makes sure compiler won't play aliasing games.
1423 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1424 sizeof(struct inet6_skb_parm));
1427 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1428 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1429 skb->len - th->doff*4);
1430 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1431 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1432 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1433 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1434 TCP_SKB_CB(skb)->sacked = 0;
1435 TCP_SKB_CB(skb)->has_rxtstamp =
1436 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1439 static int tcp_v6_rcv(struct sk_buff *skb)
1441 int sdif = inet6_sdif(skb);
1442 const struct tcphdr *th;
1443 const struct ipv6hdr *hdr;
1447 struct net *net = dev_net(skb->dev);
1449 if (skb->pkt_type != PACKET_HOST)
1453 * Count it even if it's bad.
1455 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1457 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1460 th = (const struct tcphdr *)skb->data;
1462 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1464 if (!pskb_may_pull(skb, th->doff*4))
1467 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1470 th = (const struct tcphdr *)skb->data;
1471 hdr = ipv6_hdr(skb);
1474 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1475 th->source, th->dest, inet6_iif(skb), sdif,
1481 if (sk->sk_state == TCP_TIME_WAIT)
1484 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1485 struct request_sock *req = inet_reqsk(sk);
1488 sk = req->rsk_listener;
1489 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1490 sk_drops_add(sk, skb);
1494 if (tcp_checksum_complete(skb)) {
1498 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1499 inet_csk_reqsk_queue_drop_and_put(sk, req);
1505 if (!tcp_filter(sk, skb)) {
1506 th = (const struct tcphdr *)skb->data;
1507 hdr = ipv6_hdr(skb);
1508 tcp_v6_fill_cb(skb, hdr, th);
1509 nsk = tcp_check_req(sk, skb, req, false);
1513 goto discard_and_relse;
1517 tcp_v6_restore_cb(skb);
1518 } else if (tcp_child_process(sk, nsk, skb)) {
1519 tcp_v6_send_reset(nsk, skb);
1520 goto discard_and_relse;
1526 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1527 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1528 goto discard_and_relse;
1531 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1532 goto discard_and_relse;
1534 if (tcp_v6_inbound_md5_hash(sk, skb))
1535 goto discard_and_relse;
1537 if (tcp_filter(sk, skb))
1538 goto discard_and_relse;
1539 th = (const struct tcphdr *)skb->data;
1540 hdr = ipv6_hdr(skb);
1541 tcp_v6_fill_cb(skb, hdr, th);
1545 if (sk->sk_state == TCP_LISTEN) {
1546 ret = tcp_v6_do_rcv(sk, skb);
1547 goto put_and_return;
1550 sk_incoming_cpu_update(sk);
1552 bh_lock_sock_nested(sk);
1553 tcp_segs_in(tcp_sk(sk), skb);
1555 if (!sock_owned_by_user(sk)) {
1556 ret = tcp_v6_do_rcv(sk, skb);
1557 } else if (tcp_add_backlog(sk, skb)) {
1558 goto discard_and_relse;
1565 return ret ? -1 : 0;
1568 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1571 tcp_v6_fill_cb(skb, hdr, th);
1573 if (tcp_checksum_complete(skb)) {
1575 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1577 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1579 tcp_v6_send_reset(NULL, skb);
1587 sk_drops_add(sk, skb);
1593 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1594 inet_twsk_put(inet_twsk(sk));
1598 tcp_v6_fill_cb(skb, hdr, th);
1600 if (tcp_checksum_complete(skb)) {
1601 inet_twsk_put(inet_twsk(sk));
1605 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1610 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1611 skb, __tcp_hdrlen(th),
1612 &ipv6_hdr(skb)->saddr, th->source,
1613 &ipv6_hdr(skb)->daddr,
1615 tcp_v6_iif_l3_slave(skb),
1618 struct inet_timewait_sock *tw = inet_twsk(sk);
1619 inet_twsk_deschedule_put(tw);
1621 tcp_v6_restore_cb(skb);
1625 /* Fall through to ACK */
1628 tcp_v6_timewait_ack(sk, skb);
1631 tcp_v6_send_reset(sk, skb);
1632 inet_twsk_deschedule_put(inet_twsk(sk));
1634 case TCP_TW_SUCCESS:
1640 void tcp_v6_early_demux(struct sk_buff *skb)
1642 const struct ipv6hdr *hdr;
1643 const struct tcphdr *th;
1646 if (skb->pkt_type != PACKET_HOST)
1649 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1652 hdr = ipv6_hdr(skb);
1655 if (th->doff < sizeof(struct tcphdr) / 4)
1658 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1659 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1660 &hdr->saddr, th->source,
1661 &hdr->daddr, ntohs(th->dest),
1662 inet6_iif(skb), inet6_sdif(skb));
1665 skb->destructor = sock_edemux;
1666 if (sk_fullsock(sk)) {
1667 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1670 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1672 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1673 skb_dst_set_noref(skb, dst);
1678 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1679 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1680 .twsk_unique = tcp_twsk_unique,
1681 .twsk_destructor = tcp_twsk_destructor,
1684 static const struct inet_connection_sock_af_ops ipv6_specific = {
1685 .queue_xmit = inet6_csk_xmit,
1686 .send_check = tcp_v6_send_check,
1687 .rebuild_header = inet6_sk_rebuild_header,
1688 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1689 .conn_request = tcp_v6_conn_request,
1690 .syn_recv_sock = tcp_v6_syn_recv_sock,
1691 .net_header_len = sizeof(struct ipv6hdr),
1692 .net_frag_header_len = sizeof(struct frag_hdr),
1693 .setsockopt = ipv6_setsockopt,
1694 .getsockopt = ipv6_getsockopt,
1695 .addr2sockaddr = inet6_csk_addr2sockaddr,
1696 .sockaddr_len = sizeof(struct sockaddr_in6),
1697 #ifdef CONFIG_COMPAT
1698 .compat_setsockopt = compat_ipv6_setsockopt,
1699 .compat_getsockopt = compat_ipv6_getsockopt,
1701 .mtu_reduced = tcp_v6_mtu_reduced,
1704 #ifdef CONFIG_TCP_MD5SIG
1705 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1706 .md5_lookup = tcp_v6_md5_lookup,
1707 .calc_md5_hash = tcp_v6_md5_hash_skb,
1708 .md5_parse = tcp_v6_parse_md5_keys,
1713 * TCP over IPv4 via INET6 API
1715 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1716 .queue_xmit = ip_queue_xmit,
1717 .send_check = tcp_v4_send_check,
1718 .rebuild_header = inet_sk_rebuild_header,
1719 .sk_rx_dst_set = inet_sk_rx_dst_set,
1720 .conn_request = tcp_v6_conn_request,
1721 .syn_recv_sock = tcp_v6_syn_recv_sock,
1722 .net_header_len = sizeof(struct iphdr),
1723 .setsockopt = ipv6_setsockopt,
1724 .getsockopt = ipv6_getsockopt,
1725 .addr2sockaddr = inet6_csk_addr2sockaddr,
1726 .sockaddr_len = sizeof(struct sockaddr_in6),
1727 #ifdef CONFIG_COMPAT
1728 .compat_setsockopt = compat_ipv6_setsockopt,
1729 .compat_getsockopt = compat_ipv6_getsockopt,
1731 .mtu_reduced = tcp_v4_mtu_reduced,
1734 #ifdef CONFIG_TCP_MD5SIG
1735 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1736 .md5_lookup = tcp_v4_md5_lookup,
1737 .calc_md5_hash = tcp_v4_md5_hash_skb,
1738 .md5_parse = tcp_v6_parse_md5_keys,
1742 /* NOTE: A lot of things set to zero explicitly by call to
1743 * sk_alloc() so need not be done here.
1745 static int tcp_v6_init_sock(struct sock *sk)
1747 struct inet_connection_sock *icsk = inet_csk(sk);
1751 icsk->icsk_af_ops = &ipv6_specific;
1753 #ifdef CONFIG_TCP_MD5SIG
1754 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1760 static void tcp_v6_destroy_sock(struct sock *sk)
1762 tcp_v4_destroy_sock(sk);
1763 inet6_destroy_sock(sk);
1766 #ifdef CONFIG_PROC_FS
1767 /* Proc filesystem TCPv6 sock list dumping. */
1768 static void get_openreq6(struct seq_file *seq,
1769 const struct request_sock *req, int i)
1771 long ttd = req->rsk_timer.expires - jiffies;
1772 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1773 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1779 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1780 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1782 src->s6_addr32[0], src->s6_addr32[1],
1783 src->s6_addr32[2], src->s6_addr32[3],
1784 inet_rsk(req)->ir_num,
1785 dest->s6_addr32[0], dest->s6_addr32[1],
1786 dest->s6_addr32[2], dest->s6_addr32[3],
1787 ntohs(inet_rsk(req)->ir_rmt_port),
1789 0, 0, /* could print option size, but that is af dependent. */
1790 1, /* timers active (only the expire timer) */
1791 jiffies_to_clock_t(ttd),
1793 from_kuid_munged(seq_user_ns(seq),
1794 sock_i_uid(req->rsk_listener)),
1795 0, /* non standard timer */
1796 0, /* open_requests have no inode */
1800 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1802 const struct in6_addr *dest, *src;
1805 unsigned long timer_expires;
1806 const struct inet_sock *inet = inet_sk(sp);
1807 const struct tcp_sock *tp = tcp_sk(sp);
1808 const struct inet_connection_sock *icsk = inet_csk(sp);
1809 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1813 dest = &sp->sk_v6_daddr;
1814 src = &sp->sk_v6_rcv_saddr;
1815 destp = ntohs(inet->inet_dport);
1816 srcp = ntohs(inet->inet_sport);
1818 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1819 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1820 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1822 timer_expires = icsk->icsk_timeout;
1823 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1825 timer_expires = icsk->icsk_timeout;
1826 } else if (timer_pending(&sp->sk_timer)) {
1828 timer_expires = sp->sk_timer.expires;
1831 timer_expires = jiffies;
1834 state = sk_state_load(sp);
1835 if (state == TCP_LISTEN)
1836 rx_queue = sp->sk_ack_backlog;
1838 /* Because we don't lock the socket,
1839 * we might find a transient negative value.
1841 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1844 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1845 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1847 src->s6_addr32[0], src->s6_addr32[1],
1848 src->s6_addr32[2], src->s6_addr32[3], srcp,
1849 dest->s6_addr32[0], dest->s6_addr32[1],
1850 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1852 tp->write_seq - tp->snd_una,
1855 jiffies_delta_to_clock_t(timer_expires - jiffies),
1856 icsk->icsk_retransmits,
1857 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1858 icsk->icsk_probes_out,
1860 refcount_read(&sp->sk_refcnt), sp,
1861 jiffies_to_clock_t(icsk->icsk_rto),
1862 jiffies_to_clock_t(icsk->icsk_ack.ato),
1863 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1865 state == TCP_LISTEN ?
1866 fastopenq->max_qlen :
1867 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1871 static void get_timewait6_sock(struct seq_file *seq,
1872 struct inet_timewait_sock *tw, int i)
1874 long delta = tw->tw_timer.expires - jiffies;
1875 const struct in6_addr *dest, *src;
1878 dest = &tw->tw_v6_daddr;
1879 src = &tw->tw_v6_rcv_saddr;
1880 destp = ntohs(tw->tw_dport);
1881 srcp = ntohs(tw->tw_sport);
1884 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1885 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1887 src->s6_addr32[0], src->s6_addr32[1],
1888 src->s6_addr32[2], src->s6_addr32[3], srcp,
1889 dest->s6_addr32[0], dest->s6_addr32[1],
1890 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1891 tw->tw_substate, 0, 0,
1892 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1893 refcount_read(&tw->tw_refcnt), tw);
1896 static int tcp6_seq_show(struct seq_file *seq, void *v)
1898 struct tcp_iter_state *st;
1899 struct sock *sk = v;
1901 if (v == SEQ_START_TOKEN) {
1906 "st tx_queue rx_queue tr tm->when retrnsmt"
1907 " uid timeout inode\n");
1912 if (sk->sk_state == TCP_TIME_WAIT)
1913 get_timewait6_sock(seq, v, st->num);
1914 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1915 get_openreq6(seq, v, st->num);
1917 get_tcp6_sock(seq, v, st->num);
1922 static const struct file_operations tcp6_afinfo_seq_fops = {
1923 .owner = THIS_MODULE,
1924 .open = tcp_seq_open,
1926 .llseek = seq_lseek,
1927 .release = seq_release_net
1930 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1933 .seq_fops = &tcp6_afinfo_seq_fops,
1935 .show = tcp6_seq_show,
1939 int __net_init tcp6_proc_init(struct net *net)
1941 return tcp_proc_register(net, &tcp6_seq_afinfo);
1944 void tcp6_proc_exit(struct net *net)
1946 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1950 struct proto tcpv6_prot = {
1952 .owner = THIS_MODULE,
1954 .connect = tcp_v6_connect,
1955 .disconnect = tcp_disconnect,
1956 .accept = inet_csk_accept,
1958 .init = tcp_v6_init_sock,
1959 .destroy = tcp_v6_destroy_sock,
1960 .shutdown = tcp_shutdown,
1961 .setsockopt = tcp_setsockopt,
1962 .getsockopt = tcp_getsockopt,
1963 .keepalive = tcp_set_keepalive,
1964 .recvmsg = tcp_recvmsg,
1965 .sendmsg = tcp_sendmsg,
1966 .sendpage = tcp_sendpage,
1967 .backlog_rcv = tcp_v6_do_rcv,
1968 .release_cb = tcp_release_cb,
1970 .unhash = inet_unhash,
1971 .get_port = inet_csk_get_port,
1972 .enter_memory_pressure = tcp_enter_memory_pressure,
1973 .leave_memory_pressure = tcp_leave_memory_pressure,
1974 .stream_memory_free = tcp_stream_memory_free,
1975 .sockets_allocated = &tcp_sockets_allocated,
1976 .memory_allocated = &tcp_memory_allocated,
1977 .memory_pressure = &tcp_memory_pressure,
1978 .orphan_count = &tcp_orphan_count,
1979 .sysctl_mem = sysctl_tcp_mem,
1980 .sysctl_wmem = sysctl_tcp_wmem,
1981 .sysctl_rmem = sysctl_tcp_rmem,
1982 .max_header = MAX_TCP_HEADER,
1983 .obj_size = sizeof(struct tcp6_sock),
1984 .slab_flags = SLAB_TYPESAFE_BY_RCU,
1985 .twsk_prot = &tcp6_timewait_sock_ops,
1986 .rsk_prot = &tcp6_request_sock_ops,
1987 .h.hashinfo = &tcp_hashinfo,
1988 .no_autobind = true,
1989 #ifdef CONFIG_COMPAT
1990 .compat_setsockopt = compat_tcp_setsockopt,
1991 .compat_getsockopt = compat_tcp_getsockopt,
1993 .diag_destroy = tcp_abort,
1996 static const struct inet6_protocol tcpv6_protocol = {
1997 .handler = tcp_v6_rcv,
1998 .err_handler = tcp_v6_err,
1999 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2002 static struct inet_protosw tcpv6_protosw = {
2003 .type = SOCK_STREAM,
2004 .protocol = IPPROTO_TCP,
2005 .prot = &tcpv6_prot,
2006 .ops = &inet6_stream_ops,
2007 .flags = INET_PROTOSW_PERMANENT |
2011 static int __net_init tcpv6_net_init(struct net *net)
2013 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2014 SOCK_RAW, IPPROTO_TCP, net);
2017 static void __net_exit tcpv6_net_exit(struct net *net)
2019 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2022 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2024 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2027 static struct pernet_operations tcpv6_net_ops = {
2028 .init = tcpv6_net_init,
2029 .exit = tcpv6_net_exit,
2030 .exit_batch = tcpv6_net_exit_batch,
2033 int __init tcpv6_init(void)
2037 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2041 /* register inet6 protocol */
2042 ret = inet6_register_protosw(&tcpv6_protosw);
2044 goto out_tcpv6_protocol;
2046 ret = register_pernet_subsys(&tcpv6_net_ops);
2048 goto out_tcpv6_protosw;
2053 inet6_unregister_protosw(&tcpv6_protosw);
2055 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2059 void tcpv6_exit(void)
2061 unregister_pernet_subsys(&tcpv6_net_ops);
2062 inet6_unregister_protosw(&tcpv6_protosw);
2063 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);