3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
98 rcu_assign_pointer(sk->sk_rx_dst, dst);
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
106 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source);
112 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
114 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
115 ipv6_hdr(skb)->saddr.s6_addr32);
118 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
121 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
122 struct inet_sock *inet = inet_sk(sk);
123 struct inet_connection_sock *icsk = inet_csk(sk);
124 struct ipv6_pinfo *np = inet6_sk(sk);
125 struct tcp_sock *tp = tcp_sk(sk);
126 struct in6_addr *saddr = NULL, *final_p, final;
127 struct ipv6_txoptions *opt;
129 struct dst_entry *dst;
132 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
134 if (addr_len < SIN6_LEN_RFC2133)
137 if (usin->sin6_family != AF_INET6)
138 return -EAFNOSUPPORT;
140 memset(&fl6, 0, sizeof(fl6));
143 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
144 IP6_ECN_flow_init(fl6.flowlabel);
145 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
146 struct ip6_flowlabel *flowlabel;
147 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
150 fl6_sock_release(flowlabel);
155 * connect() to INADDR_ANY means loopback (BSD'ism).
158 if (ipv6_addr_any(&usin->sin6_addr)) {
159 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
160 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
163 usin->sin6_addr = in6addr_loopback;
166 addr_type = ipv6_addr_type(&usin->sin6_addr);
168 if (addr_type & IPV6_ADDR_MULTICAST)
171 if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 if (addr_len >= sizeof(struct sockaddr_in6) &&
173 usin->sin6_scope_id) {
174 /* If interface is set while binding, indices
177 if (sk->sk_bound_dev_if &&
178 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 sk->sk_bound_dev_if = usin->sin6_scope_id;
184 /* Connect to link-local address requires an interface */
185 if (!sk->sk_bound_dev_if)
189 if (tp->rx_opt.ts_recent_stamp &&
190 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
196 sk->sk_v6_daddr = usin->sin6_addr;
197 np->flow_label = fl6.flowlabel;
203 if (addr_type & IPV6_ADDR_MAPPED) {
204 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205 struct sockaddr_in sin;
207 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
209 if (__ipv6_only_sock(sk))
212 sin.sin_family = AF_INET;
213 sin.sin_port = usin->sin6_port;
214 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
216 icsk->icsk_af_ops = &ipv6_mapped;
217 sk->sk_backlog_rcv = tcp_v4_do_rcv;
218 #ifdef CONFIG_TCP_MD5SIG
219 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
222 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
225 icsk->icsk_ext_hdr_len = exthdrlen;
226 icsk->icsk_af_ops = &ipv6_specific;
227 sk->sk_backlog_rcv = tcp_v6_do_rcv;
228 #ifdef CONFIG_TCP_MD5SIG
229 tp->af_specific = &tcp_sock_ipv6_specific;
233 np->saddr = sk->sk_v6_rcv_saddr;
238 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
239 saddr = &sk->sk_v6_rcv_saddr;
241 fl6.flowi6_proto = IPPROTO_TCP;
242 fl6.daddr = sk->sk_v6_daddr;
243 fl6.saddr = saddr ? *saddr : np->saddr;
244 fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
245 fl6.flowi6_oif = sk->sk_bound_dev_if;
246 fl6.flowi6_mark = sk->sk_mark;
247 fl6.fl6_dport = usin->sin6_port;
248 fl6.fl6_sport = inet->inet_sport;
249 fl6.flowi6_uid = sk->sk_uid;
251 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
252 final_p = fl6_update_dst(&fl6, opt, &final);
254 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
256 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
264 sk->sk_v6_rcv_saddr = *saddr;
267 /* set the source address */
269 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
271 sk->sk_gso_type = SKB_GSO_TCPV6;
272 ip6_dst_store(sk, dst, NULL, NULL);
274 icsk->icsk_ext_hdr_len = 0;
276 icsk->icsk_ext_hdr_len = opt->opt_flen +
279 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
281 inet->inet_dport = usin->sin6_port;
283 tcp_set_state(sk, TCP_SYN_SENT);
284 err = inet6_hash_connect(tcp_death_row, sk);
290 if (likely(!tp->repair)) {
292 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
293 sk->sk_v6_daddr.s6_addr32,
296 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
298 sk->sk_v6_daddr.s6_addr32);
301 if (tcp_fastopen_defer_connect(sk, &err))
306 err = tcp_connect(sk);
313 tcp_set_state(sk, TCP_CLOSE);
314 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
315 inet_reset_saddr(sk);
317 inet->inet_dport = 0;
318 sk->sk_route_caps = 0;
322 static void tcp_v6_mtu_reduced(struct sock *sk)
324 struct dst_entry *dst;
327 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
330 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
332 /* Drop requests trying to increase our current mss.
333 * Check done in __ip6_rt_update_pmtu() is too late.
335 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
338 dst = inet6_csk_update_pmtu(sk, mtu);
342 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
343 tcp_sync_mss(sk, dst_mtu(dst));
344 tcp_simple_retransmit(sk);
348 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
349 u8 type, u8 code, int offset, __be32 info)
351 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
352 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
353 struct net *net = dev_net(skb->dev);
354 struct request_sock *fastopen;
355 struct ipv6_pinfo *np;
362 sk = __inet6_lookup_established(net, &tcp_hashinfo,
363 &hdr->daddr, th->dest,
364 &hdr->saddr, ntohs(th->source),
365 skb->dev->ifindex, inet6_sdif(skb));
368 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
373 if (sk->sk_state == TCP_TIME_WAIT) {
374 inet_twsk_put(inet_twsk(sk));
377 seq = ntohl(th->seq);
378 fatal = icmpv6_err_convert(type, code, &err);
379 if (sk->sk_state == TCP_NEW_SYN_RECV)
380 return tcp_req_err(sk, seq, fatal);
383 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
384 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
386 if (sk->sk_state == TCP_CLOSE)
389 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
390 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
395 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
396 fastopen = tp->fastopen_rsk;
397 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, snd_una, tp->snd_nxt)) {
400 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
406 if (type == NDISC_REDIRECT) {
407 if (!sock_owned_by_user(sk)) {
408 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
411 dst->ops->redirect(dst, sk, skb);
416 if (type == ICMPV6_PKT_TOOBIG) {
417 u32 mtu = ntohl(info);
419 /* We are not interested in TCP_LISTEN and open_requests
420 * (SYN-ACKs send out by Linux are always <576bytes so
421 * they should go through unfragmented).
423 if (sk->sk_state == TCP_LISTEN)
426 if (!ip6_sk_accept_pmtu(sk))
429 if (mtu < IPV6_MIN_MTU)
432 WRITE_ONCE(tp->mtu_info, mtu);
434 if (!sock_owned_by_user(sk))
435 tcp_v6_mtu_reduced(sk);
436 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
443 /* Might be for an request_sock */
444 switch (sk->sk_state) {
447 /* Only in fast or simultaneous open. If a fast open socket is
448 * is already accepted it is treated as a connected one below.
450 if (fastopen && !fastopen->sk)
453 if (!sock_owned_by_user(sk)) {
455 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
459 sk->sk_err_soft = err;
463 if (!sock_owned_by_user(sk) && np->recverr) {
465 sk->sk_error_report(sk);
467 sk->sk_err_soft = err;
475 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
477 struct request_sock *req,
478 struct tcp_fastopen_cookie *foc,
479 enum tcp_synack_type synack_type)
481 struct inet_request_sock *ireq = inet_rsk(req);
482 struct ipv6_pinfo *np = inet6_sk(sk);
483 struct ipv6_txoptions *opt;
484 struct flowi6 *fl6 = &fl->u.ip6;
488 /* First, grab a route. */
489 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
490 IPPROTO_TCP)) == NULL)
493 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
496 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
497 &ireq->ir_v6_rmt_addr);
499 fl6->daddr = ireq->ir_v6_rmt_addr;
500 if (np->repflow && ireq->pktopts)
501 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
504 opt = ireq->ipv6_opt;
506 opt = rcu_dereference(np->opt);
507 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
510 err = net_xmit_eval(err);
518 static void tcp_v6_reqsk_destructor(struct request_sock *req)
520 kfree(inet_rsk(req)->ipv6_opt);
521 kfree_skb(inet_rsk(req)->pktopts);
524 #ifdef CONFIG_TCP_MD5SIG
525 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
526 const struct in6_addr *addr)
528 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
531 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
532 const struct sock *addr_sk)
534 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
537 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
538 char __user *optval, int optlen)
540 struct tcp_md5sig cmd;
541 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
544 if (optlen < sizeof(cmd))
547 if (copy_from_user(&cmd, optval, sizeof(cmd)))
550 if (sin6->sin6_family != AF_INET6)
553 if (optname == TCP_MD5SIG_EXT &&
554 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
555 prefixlen = cmd.tcpm_prefixlen;
556 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
560 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
563 if (!cmd.tcpm_keylen) {
564 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
565 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
567 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
568 AF_INET6, prefixlen);
571 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
574 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
575 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
576 AF_INET, prefixlen, cmd.tcpm_key,
577 cmd.tcpm_keylen, GFP_KERNEL);
579 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
580 AF_INET6, prefixlen, cmd.tcpm_key,
581 cmd.tcpm_keylen, GFP_KERNEL);
584 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
585 const struct in6_addr *daddr,
586 const struct in6_addr *saddr,
587 const struct tcphdr *th, int nbytes)
589 struct tcp6_pseudohdr *bp;
590 struct scatterlist sg;
594 /* 1. TCP pseudo-header (RFC2460) */
597 bp->protocol = cpu_to_be32(IPPROTO_TCP);
598 bp->len = cpu_to_be32(nbytes);
600 _th = (struct tcphdr *)(bp + 1);
601 memcpy(_th, th, sizeof(*th));
604 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
605 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
606 sizeof(*bp) + sizeof(*th));
607 return crypto_ahash_update(hp->md5_req);
610 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
611 const struct in6_addr *daddr, struct in6_addr *saddr,
612 const struct tcphdr *th)
614 struct tcp_md5sig_pool *hp;
615 struct ahash_request *req;
617 hp = tcp_get_md5sig_pool();
619 goto clear_hash_noput;
622 if (crypto_ahash_init(req))
624 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
626 if (tcp_md5_hash_key(hp, key))
628 ahash_request_set_crypt(req, NULL, md5_hash, 0);
629 if (crypto_ahash_final(req))
632 tcp_put_md5sig_pool();
636 tcp_put_md5sig_pool();
638 memset(md5_hash, 0, 16);
642 static int tcp_v6_md5_hash_skb(char *md5_hash,
643 const struct tcp_md5sig_key *key,
644 const struct sock *sk,
645 const struct sk_buff *skb)
647 const struct in6_addr *saddr, *daddr;
648 struct tcp_md5sig_pool *hp;
649 struct ahash_request *req;
650 const struct tcphdr *th = tcp_hdr(skb);
652 if (sk) { /* valid for establish/request sockets */
653 saddr = &sk->sk_v6_rcv_saddr;
654 daddr = &sk->sk_v6_daddr;
656 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
657 saddr = &ip6h->saddr;
658 daddr = &ip6h->daddr;
661 hp = tcp_get_md5sig_pool();
663 goto clear_hash_noput;
666 if (crypto_ahash_init(req))
669 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
671 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
673 if (tcp_md5_hash_key(hp, key))
675 ahash_request_set_crypt(req, NULL, md5_hash, 0);
676 if (crypto_ahash_final(req))
679 tcp_put_md5sig_pool();
683 tcp_put_md5sig_pool();
685 memset(md5_hash, 0, 16);
691 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
692 const struct sk_buff *skb)
694 #ifdef CONFIG_TCP_MD5SIG
695 const __u8 *hash_location = NULL;
696 struct tcp_md5sig_key *hash_expected;
697 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
698 const struct tcphdr *th = tcp_hdr(skb);
702 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
703 hash_location = tcp_parse_md5sig_option(th);
705 /* We've parsed the options - do we have a hash? */
706 if (!hash_expected && !hash_location)
709 if (hash_expected && !hash_location) {
710 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
714 if (!hash_expected && hash_location) {
715 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
719 /* check the signature */
720 genhash = tcp_v6_md5_hash_skb(newhash,
724 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
725 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
726 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
727 genhash ? "failed" : "mismatch",
728 &ip6h->saddr, ntohs(th->source),
729 &ip6h->daddr, ntohs(th->dest));
736 static void tcp_v6_init_req(struct request_sock *req,
737 const struct sock *sk_listener,
740 struct inet_request_sock *ireq = inet_rsk(req);
741 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
743 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
744 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
746 /* So that link locals have meaning */
747 if (!sk_listener->sk_bound_dev_if &&
748 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
749 ireq->ir_iif = tcp_v6_iif(skb);
751 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
752 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
753 np->rxopt.bits.rxinfo ||
754 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
755 np->rxopt.bits.rxohlim || np->repflow)) {
756 refcount_inc(&skb->users);
761 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
763 const struct request_sock *req)
765 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
768 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
770 .obj_size = sizeof(struct tcp6_request_sock),
771 .rtx_syn_ack = tcp_rtx_synack,
772 .send_ack = tcp_v6_reqsk_send_ack,
773 .destructor = tcp_v6_reqsk_destructor,
774 .send_reset = tcp_v6_send_reset,
775 .syn_ack_timeout = tcp_syn_ack_timeout,
778 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
779 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
780 sizeof(struct ipv6hdr),
781 #ifdef CONFIG_TCP_MD5SIG
782 .req_md5_lookup = tcp_v6_md5_lookup,
783 .calc_md5_hash = tcp_v6_md5_hash_skb,
785 .init_req = tcp_v6_init_req,
786 #ifdef CONFIG_SYN_COOKIES
787 .cookie_init_seq = cookie_v6_init_sequence,
789 .route_req = tcp_v6_route_req,
790 .init_seq = tcp_v6_init_seq,
791 .init_ts_off = tcp_v6_init_ts_off,
792 .send_synack = tcp_v6_send_synack,
795 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
796 u32 ack, u32 win, u32 tsval, u32 tsecr,
797 int oif, struct tcp_md5sig_key *key, int rst,
798 u8 tclass, __be32 label)
800 const struct tcphdr *th = tcp_hdr(skb);
802 struct sk_buff *buff;
804 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
805 struct sock *ctl_sk = net->ipv6.tcp_sk;
806 unsigned int tot_len = sizeof(struct tcphdr);
807 struct dst_entry *dst;
811 tot_len += TCPOLEN_TSTAMP_ALIGNED;
812 #ifdef CONFIG_TCP_MD5SIG
814 tot_len += TCPOLEN_MD5SIG_ALIGNED;
817 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
822 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
824 t1 = skb_push(buff, tot_len);
825 skb_reset_transport_header(buff);
827 /* Swap the send and the receive. */
828 memset(t1, 0, sizeof(*t1));
829 t1->dest = th->source;
830 t1->source = th->dest;
831 t1->doff = tot_len / 4;
832 t1->seq = htonl(seq);
833 t1->ack_seq = htonl(ack);
834 t1->ack = !rst || !th->ack;
836 t1->window = htons(win);
838 topt = (__be32 *)(t1 + 1);
841 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
842 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
843 *topt++ = htonl(tsval);
844 *topt++ = htonl(tsecr);
847 #ifdef CONFIG_TCP_MD5SIG
849 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
850 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
851 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
852 &ipv6_hdr(skb)->saddr,
853 &ipv6_hdr(skb)->daddr, t1);
857 memset(&fl6, 0, sizeof(fl6));
858 fl6.daddr = ipv6_hdr(skb)->saddr;
859 fl6.saddr = ipv6_hdr(skb)->daddr;
860 fl6.flowlabel = label;
862 buff->ip_summed = CHECKSUM_PARTIAL;
865 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
867 fl6.flowi6_proto = IPPROTO_TCP;
868 if (rt6_need_strict(&fl6.daddr) && !oif)
869 fl6.flowi6_oif = tcp_v6_iif(skb);
871 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
874 fl6.flowi6_oif = oif;
877 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
878 fl6.fl6_dport = t1->dest;
879 fl6.fl6_sport = t1->source;
880 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
881 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
883 /* Pass a socket to ip6_dst_lookup either it is for RST
884 * Underlying function will use this to retrieve the network
887 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
889 skb_dst_set(buff, dst);
890 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
891 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
893 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
900 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
902 const struct tcphdr *th = tcp_hdr(skb);
903 u32 seq = 0, ack_seq = 0;
904 struct tcp_md5sig_key *key = NULL;
905 #ifdef CONFIG_TCP_MD5SIG
906 const __u8 *hash_location = NULL;
907 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
908 unsigned char newhash[16];
910 struct sock *sk1 = NULL;
917 /* If sk not NULL, it means we did a successful lookup and incoming
918 * route had to be correct. prequeue might have dropped our dst.
920 if (!sk && !ipv6_unicast_destination(skb))
923 #ifdef CONFIG_TCP_MD5SIG
925 hash_location = tcp_parse_md5sig_option(th);
926 if (sk && sk_fullsock(sk)) {
927 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
928 } else if (hash_location) {
930 * active side is lost. Try to find listening socket through
931 * source port, and then find md5 key through listening socket.
932 * we are not loose security here:
933 * Incoming packet is checked with md5 hash with finding key,
934 * no RST generated if md5 hash doesn't match.
936 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
937 &tcp_hashinfo, NULL, 0,
939 th->source, &ipv6h->daddr,
941 tcp_v6_iif_l3_slave(skb),
946 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
950 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
951 if (genhash || memcmp(hash_location, newhash, 16) != 0)
957 seq = ntohl(th->ack_seq);
959 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
962 oif = sk ? sk->sk_bound_dev_if : 0;
963 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
965 #ifdef CONFIG_TCP_MD5SIG
971 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
972 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
973 struct tcp_md5sig_key *key, u8 tclass,
976 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
980 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
982 struct inet_timewait_sock *tw = inet_twsk(sk);
983 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
985 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
986 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
987 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
988 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
989 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
994 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
995 struct request_sock *req)
997 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
998 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1001 * The window field (SEG.WND) of every outgoing segment, with the
1002 * exception of <SYN> segments, MUST be right-shifted by
1003 * Rcv.Wind.Shift bits:
1005 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1006 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1007 tcp_rsk(req)->rcv_nxt,
1008 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1009 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1010 req->ts_recent, sk->sk_bound_dev_if,
1011 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1016 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1018 #ifdef CONFIG_SYN_COOKIES
1019 const struct tcphdr *th = tcp_hdr(skb);
1022 sk = cookie_v6_check(sk, skb);
1027 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1029 if (skb->protocol == htons(ETH_P_IP))
1030 return tcp_v4_conn_request(sk, skb);
1032 if (!ipv6_unicast_destination(skb))
1035 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1036 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1040 return tcp_conn_request(&tcp6_request_sock_ops,
1041 &tcp_request_sock_ipv6_ops, sk, skb);
1045 return 0; /* don't send reset */
1048 static void tcp_v6_restore_cb(struct sk_buff *skb)
1050 /* We need to move header back to the beginning if xfrm6_policy_check()
1051 * and tcp_v6_fill_cb() are going to be called again.
1052 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1054 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1055 sizeof(struct inet6_skb_parm));
1058 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1059 struct request_sock *req,
1060 struct dst_entry *dst,
1061 struct request_sock *req_unhash,
1064 struct inet_request_sock *ireq;
1065 struct ipv6_pinfo *newnp;
1066 const struct ipv6_pinfo *np = inet6_sk(sk);
1067 struct ipv6_txoptions *opt;
1068 struct tcp6_sock *newtcp6sk;
1069 struct inet_sock *newinet;
1070 bool found_dup_sk = false;
1071 struct tcp_sock *newtp;
1073 #ifdef CONFIG_TCP_MD5SIG
1074 struct tcp_md5sig_key *key;
1078 if (skb->protocol == htons(ETH_P_IP)) {
1083 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1084 req_unhash, own_req);
1089 newtcp6sk = (struct tcp6_sock *)newsk;
1090 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1092 newinet = inet_sk(newsk);
1093 newnp = inet6_sk(newsk);
1094 newtp = tcp_sk(newsk);
1096 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1098 newnp->saddr = newsk->sk_v6_rcv_saddr;
1100 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1101 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1102 #ifdef CONFIG_TCP_MD5SIG
1103 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1106 newnp->ipv6_mc_list = NULL;
1107 newnp->ipv6_ac_list = NULL;
1108 newnp->ipv6_fl_list = NULL;
1109 newnp->pktoptions = NULL;
1111 newnp->mcast_oif = inet_iif(skb);
1112 newnp->mcast_hops = ip_hdr(skb)->ttl;
1113 newnp->rcv_flowinfo = 0;
1115 newnp->flow_label = 0;
1118 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1119 * here, tcp_create_openreq_child now does this for us, see the comment in
1120 * that function for the gory details. -acme
1123 /* It is tricky place. Until this moment IPv4 tcp
1124 worked with IPv6 icsk.icsk_af_ops.
1127 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1132 ireq = inet_rsk(req);
1134 if (sk_acceptq_is_full(sk))
1138 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1143 newsk = tcp_create_openreq_child(sk, req, skb);
1148 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1149 * count here, tcp_create_openreq_child now does this for us, see the
1150 * comment in that function for the gory details. -acme
1153 newsk->sk_gso_type = SKB_GSO_TCPV6;
1154 ip6_dst_store(newsk, dst, NULL, NULL);
1155 inet6_sk_rx_dst_set(newsk, skb);
1157 newtcp6sk = (struct tcp6_sock *)newsk;
1158 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1160 newtp = tcp_sk(newsk);
1161 newinet = inet_sk(newsk);
1162 newnp = inet6_sk(newsk);
1164 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1166 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1167 newnp->saddr = ireq->ir_v6_loc_addr;
1168 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1169 newsk->sk_bound_dev_if = ireq->ir_iif;
1171 /* Now IPv6 options...
1173 First: no IPv4 options.
1175 newinet->inet_opt = NULL;
1176 newnp->ipv6_mc_list = NULL;
1177 newnp->ipv6_ac_list = NULL;
1178 newnp->ipv6_fl_list = NULL;
1181 newnp->rxopt.all = np->rxopt.all;
1183 newnp->pktoptions = NULL;
1185 newnp->mcast_oif = tcp_v6_iif(skb);
1186 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1187 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1189 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1191 /* Clone native IPv6 options from listening socket (if any)
1193 Yes, keeping reference count would be much more clever,
1194 but we make one more one thing there: reattach optmem
1197 opt = ireq->ipv6_opt;
1199 opt = rcu_dereference(np->opt);
1201 opt = ipv6_dup_options(newsk, opt);
1202 RCU_INIT_POINTER(newnp->opt, opt);
1204 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1206 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1209 tcp_ca_openreq_child(newsk, dst);
1211 tcp_sync_mss(newsk, dst_mtu(dst));
1212 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1214 tcp_initialize_rcv_mss(newsk);
1216 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1217 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1219 #ifdef CONFIG_TCP_MD5SIG
1220 /* Copy over the MD5 key from the original socket */
1221 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1223 /* We're using one, so create a matching key
1224 * on the newsk structure. If we fail to get
1225 * memory, then we end up not copying the key
1228 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1229 AF_INET6, 128, key->key, key->keylen,
1230 sk_gfp_mask(sk, GFP_ATOMIC));
1234 if (__inet_inherit_port(sk, newsk) < 0) {
1235 inet_csk_prepare_forced_close(newsk);
1239 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1242 tcp_move_syn(newtp, req);
1244 /* Clone pktoptions received with SYN, if we own the req */
1245 if (ireq->pktopts) {
1246 newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1247 consume_skb(ireq->pktopts);
1248 ireq->pktopts = NULL;
1249 if (newnp->pktoptions)
1250 tcp_v6_restore_cb(newnp->pktoptions);
1253 if (!req_unhash && found_dup_sk) {
1254 /* This code path should only be executed in the
1255 * syncookie case only
1257 bh_unlock_sock(newsk);
1266 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1274 /* The socket must have it's spinlock held when we get
1275 * here, unless it is a TCP_LISTEN socket.
1277 * We have a potential double-lock case here, so even when
1278 * doing backlog processing we use the BH locking scheme.
1279 * This is because we cannot sleep with the original spinlock
1282 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1284 struct ipv6_pinfo *np = inet6_sk(sk);
1285 struct tcp_sock *tp;
1286 struct sk_buff *opt_skb = NULL;
1288 /* Imagine: socket is IPv6. IPv4 packet arrives,
1289 goes to IPv4 receive handler and backlogged.
1290 From backlog it always goes here. Kerboom...
1291 Fortunately, tcp_rcv_established and rcv_established
1292 handle them correctly, but it is not case with
1293 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1296 if (skb->protocol == htons(ETH_P_IP))
1297 return tcp_v4_do_rcv(sk, skb);
1300 * socket locking is here for SMP purposes as backlog rcv
1301 * is currently called with bh processing disabled.
1304 /* Do Stevens' IPV6_PKTOPTIONS.
1306 Yes, guys, it is the only place in our code, where we
1307 may make it not affecting IPv4.
1308 The rest of code is protocol independent,
1309 and I do not like idea to uglify IPv4.
1311 Actually, all the idea behind IPV6_PKTOPTIONS
1312 looks not very well thought. For now we latch
1313 options, received in the last packet, enqueued
1314 by tcp. Feel free to propose better solution.
1318 opt_skb = skb_clone_and_charge_r(skb, sk);
1320 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1321 struct dst_entry *dst;
1323 dst = rcu_dereference_protected(sk->sk_rx_dst,
1324 lockdep_sock_is_held(sk));
1326 sock_rps_save_rxhash(sk, skb);
1327 sk_mark_napi_id(sk, skb);
1329 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1330 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1331 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1336 tcp_rcv_established(sk, skb, tcp_hdr(skb));
1338 goto ipv6_pktoptions;
1342 if (tcp_checksum_complete(skb))
1345 if (sk->sk_state == TCP_LISTEN) {
1346 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1352 if (tcp_child_process(sk, nsk, skb))
1355 __kfree_skb(opt_skb);
1359 sock_rps_save_rxhash(sk, skb);
1361 if (tcp_rcv_state_process(sk, skb))
1364 goto ipv6_pktoptions;
1368 tcp_v6_send_reset(sk, skb);
1371 __kfree_skb(opt_skb);
1375 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1376 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1381 /* Do you ask, what is it?
1383 1. skb was enqueued by tcp.
1384 2. skb is added to tail of read queue, rather than out of order.
1385 3. socket is not in passive state.
1386 4. Finally, it really contains options, which user wants to receive.
1389 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1390 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1391 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1392 np->mcast_oif = tcp_v6_iif(opt_skb);
1393 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1394 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1395 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1396 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1398 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1399 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1400 tcp_v6_restore_cb(opt_skb);
1401 opt_skb = xchg(&np->pktoptions, opt_skb);
1403 __kfree_skb(opt_skb);
1404 opt_skb = xchg(&np->pktoptions, NULL);
1412 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1413 const struct tcphdr *th)
1415 /* This is tricky: we move IP6CB at its correct location into
1416 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1417 * _decode_session6() uses IP6CB().
1418 * barrier() makes sure compiler won't play aliasing games.
1420 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1421 sizeof(struct inet6_skb_parm));
1424 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1425 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1426 skb->len - th->doff*4);
1427 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1428 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1429 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1430 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1431 TCP_SKB_CB(skb)->sacked = 0;
1432 TCP_SKB_CB(skb)->has_rxtstamp =
1433 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1436 static int tcp_v6_rcv(struct sk_buff *skb)
1438 int sdif = inet6_sdif(skb);
1439 const struct tcphdr *th;
1440 const struct ipv6hdr *hdr;
1444 struct net *net = dev_net(skb->dev);
1446 if (skb->pkt_type != PACKET_HOST)
1450 * Count it even if it's bad.
1452 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1454 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1457 th = (const struct tcphdr *)skb->data;
1459 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1461 if (!pskb_may_pull(skb, th->doff*4))
1464 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1467 th = (const struct tcphdr *)skb->data;
1468 hdr = ipv6_hdr(skb);
1471 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1472 th->source, th->dest, inet6_iif(skb), sdif,
1478 if (sk->sk_state == TCP_TIME_WAIT)
1481 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1482 struct request_sock *req = inet_reqsk(sk);
1485 sk = req->rsk_listener;
1486 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1487 sk_drops_add(sk, skb);
1491 if (tcp_checksum_complete(skb)) {
1495 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1496 inet_csk_reqsk_queue_drop_and_put(sk, req);
1502 if (!tcp_filter(sk, skb)) {
1503 th = (const struct tcphdr *)skb->data;
1504 hdr = ipv6_hdr(skb);
1505 tcp_v6_fill_cb(skb, hdr, th);
1506 nsk = tcp_check_req(sk, skb, req, false);
1510 goto discard_and_relse;
1514 tcp_v6_restore_cb(skb);
1515 } else if (tcp_child_process(sk, nsk, skb)) {
1516 tcp_v6_send_reset(nsk, skb);
1517 goto discard_and_relse;
1523 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1524 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1525 goto discard_and_relse;
1528 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1529 goto discard_and_relse;
1531 if (tcp_v6_inbound_md5_hash(sk, skb))
1532 goto discard_and_relse;
1534 if (tcp_filter(sk, skb))
1535 goto discard_and_relse;
1536 th = (const struct tcphdr *)skb->data;
1537 hdr = ipv6_hdr(skb);
1538 tcp_v6_fill_cb(skb, hdr, th);
1542 if (sk->sk_state == TCP_LISTEN) {
1543 ret = tcp_v6_do_rcv(sk, skb);
1544 goto put_and_return;
1547 sk_incoming_cpu_update(sk);
1549 bh_lock_sock_nested(sk);
1550 tcp_segs_in(tcp_sk(sk), skb);
1552 if (!sock_owned_by_user(sk)) {
1553 ret = tcp_v6_do_rcv(sk, skb);
1554 } else if (tcp_add_backlog(sk, skb)) {
1555 goto discard_and_relse;
1562 return ret ? -1 : 0;
1565 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1568 tcp_v6_fill_cb(skb, hdr, th);
1570 if (tcp_checksum_complete(skb)) {
1572 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1574 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1576 tcp_v6_send_reset(NULL, skb);
1584 sk_drops_add(sk, skb);
1590 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1591 inet_twsk_put(inet_twsk(sk));
1595 tcp_v6_fill_cb(skb, hdr, th);
1597 if (tcp_checksum_complete(skb)) {
1598 inet_twsk_put(inet_twsk(sk));
1602 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1607 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1608 skb, __tcp_hdrlen(th),
1609 &ipv6_hdr(skb)->saddr, th->source,
1610 &ipv6_hdr(skb)->daddr,
1612 tcp_v6_iif_l3_slave(skb),
1615 struct inet_timewait_sock *tw = inet_twsk(sk);
1616 inet_twsk_deschedule_put(tw);
1618 tcp_v6_restore_cb(skb);
1622 /* Fall through to ACK */
1625 tcp_v6_timewait_ack(sk, skb);
1628 tcp_v6_send_reset(sk, skb);
1629 inet_twsk_deschedule_put(inet_twsk(sk));
1631 case TCP_TW_SUCCESS:
1637 void tcp_v6_early_demux(struct sk_buff *skb)
1639 const struct ipv6hdr *hdr;
1640 const struct tcphdr *th;
1643 if (skb->pkt_type != PACKET_HOST)
1646 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1649 hdr = ipv6_hdr(skb);
1652 if (th->doff < sizeof(struct tcphdr) / 4)
1655 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1656 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1657 &hdr->saddr, th->source,
1658 &hdr->daddr, ntohs(th->dest),
1659 inet6_iif(skb), inet6_sdif(skb));
1662 skb->destructor = sock_edemux;
1663 if (sk_fullsock(sk)) {
1664 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1667 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1669 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1670 skb_dst_set_noref(skb, dst);
1675 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1676 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1677 .twsk_unique = tcp_twsk_unique,
1678 .twsk_destructor = tcp_twsk_destructor,
1681 static const struct inet_connection_sock_af_ops ipv6_specific = {
1682 .queue_xmit = inet6_csk_xmit,
1683 .send_check = tcp_v6_send_check,
1684 .rebuild_header = inet6_sk_rebuild_header,
1685 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1686 .conn_request = tcp_v6_conn_request,
1687 .syn_recv_sock = tcp_v6_syn_recv_sock,
1688 .net_header_len = sizeof(struct ipv6hdr),
1689 .net_frag_header_len = sizeof(struct frag_hdr),
1690 .setsockopt = ipv6_setsockopt,
1691 .getsockopt = ipv6_getsockopt,
1692 .addr2sockaddr = inet6_csk_addr2sockaddr,
1693 .sockaddr_len = sizeof(struct sockaddr_in6),
1694 #ifdef CONFIG_COMPAT
1695 .compat_setsockopt = compat_ipv6_setsockopt,
1696 .compat_getsockopt = compat_ipv6_getsockopt,
1698 .mtu_reduced = tcp_v6_mtu_reduced,
1701 #ifdef CONFIG_TCP_MD5SIG
1702 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1703 .md5_lookup = tcp_v6_md5_lookup,
1704 .calc_md5_hash = tcp_v6_md5_hash_skb,
1705 .md5_parse = tcp_v6_parse_md5_keys,
1710 * TCP over IPv4 via INET6 API
1712 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1713 .queue_xmit = ip_queue_xmit,
1714 .send_check = tcp_v4_send_check,
1715 .rebuild_header = inet_sk_rebuild_header,
1716 .sk_rx_dst_set = inet_sk_rx_dst_set,
1717 .conn_request = tcp_v6_conn_request,
1718 .syn_recv_sock = tcp_v6_syn_recv_sock,
1719 .net_header_len = sizeof(struct iphdr),
1720 .setsockopt = ipv6_setsockopt,
1721 .getsockopt = ipv6_getsockopt,
1722 .addr2sockaddr = inet6_csk_addr2sockaddr,
1723 .sockaddr_len = sizeof(struct sockaddr_in6),
1724 #ifdef CONFIG_COMPAT
1725 .compat_setsockopt = compat_ipv6_setsockopt,
1726 .compat_getsockopt = compat_ipv6_getsockopt,
1728 .mtu_reduced = tcp_v4_mtu_reduced,
1731 #ifdef CONFIG_TCP_MD5SIG
1732 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1733 .md5_lookup = tcp_v4_md5_lookup,
1734 .calc_md5_hash = tcp_v4_md5_hash_skb,
1735 .md5_parse = tcp_v6_parse_md5_keys,
1739 /* NOTE: A lot of things set to zero explicitly by call to
1740 * sk_alloc() so need not be done here.
1742 static int tcp_v6_init_sock(struct sock *sk)
1744 struct inet_connection_sock *icsk = inet_csk(sk);
1748 icsk->icsk_af_ops = &ipv6_specific;
1750 #ifdef CONFIG_TCP_MD5SIG
1751 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1757 #ifdef CONFIG_PROC_FS
1758 /* Proc filesystem TCPv6 sock list dumping. */
1759 static void get_openreq6(struct seq_file *seq,
1760 const struct request_sock *req, int i)
1762 long ttd = req->rsk_timer.expires - jiffies;
1763 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1764 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1770 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1771 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1773 src->s6_addr32[0], src->s6_addr32[1],
1774 src->s6_addr32[2], src->s6_addr32[3],
1775 inet_rsk(req)->ir_num,
1776 dest->s6_addr32[0], dest->s6_addr32[1],
1777 dest->s6_addr32[2], dest->s6_addr32[3],
1778 ntohs(inet_rsk(req)->ir_rmt_port),
1780 0, 0, /* could print option size, but that is af dependent. */
1781 1, /* timers active (only the expire timer) */
1782 jiffies_to_clock_t(ttd),
1784 from_kuid_munged(seq_user_ns(seq),
1785 sock_i_uid(req->rsk_listener)),
1786 0, /* non standard timer */
1787 0, /* open_requests have no inode */
1791 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1793 const struct in6_addr *dest, *src;
1796 unsigned long timer_expires;
1797 const struct inet_sock *inet = inet_sk(sp);
1798 const struct tcp_sock *tp = tcp_sk(sp);
1799 const struct inet_connection_sock *icsk = inet_csk(sp);
1800 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1804 dest = &sp->sk_v6_daddr;
1805 src = &sp->sk_v6_rcv_saddr;
1806 destp = ntohs(inet->inet_dport);
1807 srcp = ntohs(inet->inet_sport);
1809 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1810 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1811 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1813 timer_expires = icsk->icsk_timeout;
1814 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1816 timer_expires = icsk->icsk_timeout;
1817 } else if (timer_pending(&sp->sk_timer)) {
1819 timer_expires = sp->sk_timer.expires;
1822 timer_expires = jiffies;
1825 state = sk_state_load(sp);
1826 if (state == TCP_LISTEN)
1827 rx_queue = sp->sk_ack_backlog;
1829 /* Because we don't lock the socket,
1830 * we might find a transient negative value.
1832 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1835 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1836 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1838 src->s6_addr32[0], src->s6_addr32[1],
1839 src->s6_addr32[2], src->s6_addr32[3], srcp,
1840 dest->s6_addr32[0], dest->s6_addr32[1],
1841 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1843 tp->write_seq - tp->snd_una,
1846 jiffies_delta_to_clock_t(timer_expires - jiffies),
1847 icsk->icsk_retransmits,
1848 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1849 icsk->icsk_probes_out,
1851 refcount_read(&sp->sk_refcnt), sp,
1852 jiffies_to_clock_t(icsk->icsk_rto),
1853 jiffies_to_clock_t(icsk->icsk_ack.ato),
1854 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1856 state == TCP_LISTEN ?
1857 fastopenq->max_qlen :
1858 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1862 static void get_timewait6_sock(struct seq_file *seq,
1863 struct inet_timewait_sock *tw, int i)
1865 long delta = tw->tw_timer.expires - jiffies;
1866 const struct in6_addr *dest, *src;
1869 dest = &tw->tw_v6_daddr;
1870 src = &tw->tw_v6_rcv_saddr;
1871 destp = ntohs(tw->tw_dport);
1872 srcp = ntohs(tw->tw_sport);
1875 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1876 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1878 src->s6_addr32[0], src->s6_addr32[1],
1879 src->s6_addr32[2], src->s6_addr32[3], srcp,
1880 dest->s6_addr32[0], dest->s6_addr32[1],
1881 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1882 tw->tw_substate, 0, 0,
1883 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1884 refcount_read(&tw->tw_refcnt), tw);
1887 static int tcp6_seq_show(struct seq_file *seq, void *v)
1889 struct tcp_iter_state *st;
1890 struct sock *sk = v;
1892 if (v == SEQ_START_TOKEN) {
1897 "st tx_queue rx_queue tr tm->when retrnsmt"
1898 " uid timeout inode\n");
1903 if (sk->sk_state == TCP_TIME_WAIT)
1904 get_timewait6_sock(seq, v, st->num);
1905 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1906 get_openreq6(seq, v, st->num);
1908 get_tcp6_sock(seq, v, st->num);
1913 static const struct file_operations tcp6_afinfo_seq_fops = {
1914 .owner = THIS_MODULE,
1915 .open = tcp_seq_open,
1917 .llseek = seq_lseek,
1918 .release = seq_release_net
1921 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1924 .seq_fops = &tcp6_afinfo_seq_fops,
1926 .show = tcp6_seq_show,
1930 int __net_init tcp6_proc_init(struct net *net)
1932 return tcp_proc_register(net, &tcp6_seq_afinfo);
1935 void tcp6_proc_exit(struct net *net)
1937 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1941 struct proto tcpv6_prot = {
1943 .owner = THIS_MODULE,
1945 .connect = tcp_v6_connect,
1946 .disconnect = tcp_disconnect,
1947 .accept = inet_csk_accept,
1949 .init = tcp_v6_init_sock,
1950 .destroy = tcp_v4_destroy_sock,
1951 .shutdown = tcp_shutdown,
1952 .setsockopt = tcp_setsockopt,
1953 .getsockopt = tcp_getsockopt,
1954 .keepalive = tcp_set_keepalive,
1955 .recvmsg = tcp_recvmsg,
1956 .sendmsg = tcp_sendmsg,
1957 .sendpage = tcp_sendpage,
1958 .backlog_rcv = tcp_v6_do_rcv,
1959 .release_cb = tcp_release_cb,
1961 .unhash = inet_unhash,
1962 .get_port = inet_csk_get_port,
1963 .enter_memory_pressure = tcp_enter_memory_pressure,
1964 .leave_memory_pressure = tcp_leave_memory_pressure,
1965 .stream_memory_free = tcp_stream_memory_free,
1966 .sockets_allocated = &tcp_sockets_allocated,
1967 .memory_allocated = &tcp_memory_allocated,
1968 .memory_pressure = &tcp_memory_pressure,
1969 .orphan_count = &tcp_orphan_count,
1970 .sysctl_mem = sysctl_tcp_mem,
1971 .sysctl_wmem = sysctl_tcp_wmem,
1972 .sysctl_rmem = sysctl_tcp_rmem,
1973 .max_header = MAX_TCP_HEADER,
1974 .obj_size = sizeof(struct tcp6_sock),
1975 .slab_flags = SLAB_TYPESAFE_BY_RCU,
1976 .twsk_prot = &tcp6_timewait_sock_ops,
1977 .rsk_prot = &tcp6_request_sock_ops,
1978 .h.hashinfo = &tcp_hashinfo,
1979 .no_autobind = true,
1980 #ifdef CONFIG_COMPAT
1981 .compat_setsockopt = compat_tcp_setsockopt,
1982 .compat_getsockopt = compat_tcp_getsockopt,
1984 .diag_destroy = tcp_abort,
1987 static const struct inet6_protocol tcpv6_protocol = {
1988 .handler = tcp_v6_rcv,
1989 .err_handler = tcp_v6_err,
1990 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1993 static struct inet_protosw tcpv6_protosw = {
1994 .type = SOCK_STREAM,
1995 .protocol = IPPROTO_TCP,
1996 .prot = &tcpv6_prot,
1997 .ops = &inet6_stream_ops,
1998 .flags = INET_PROTOSW_PERMANENT |
2002 static int __net_init tcpv6_net_init(struct net *net)
2004 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2005 SOCK_RAW, IPPROTO_TCP, net);
2008 static void __net_exit tcpv6_net_exit(struct net *net)
2010 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2013 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2015 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2018 static struct pernet_operations tcpv6_net_ops = {
2019 .init = tcpv6_net_init,
2020 .exit = tcpv6_net_exit,
2021 .exit_batch = tcpv6_net_exit_batch,
2024 int __init tcpv6_init(void)
2028 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2032 /* register inet6 protocol */
2033 ret = inet6_register_protosw(&tcpv6_protosw);
2035 goto out_tcpv6_protocol;
2037 ret = register_pernet_subsys(&tcpv6_net_ops);
2039 goto out_tcpv6_protosw;
2044 inet6_unregister_protosw(&tcpv6_protosw);
2046 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2050 void tcpv6_exit(void)
2052 unregister_pernet_subsys(&tcpv6_net_ops);
2053 inet6_unregister_protosw(&tcpv6_protosw);
2054 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);