3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
106 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source);
112 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
115 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
116 struct inet_sock *inet = inet_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 struct ipv6_pinfo *np = inet6_sk(sk);
119 struct tcp_sock *tp = tcp_sk(sk);
120 struct in6_addr *saddr = NULL, *final_p, final;
121 struct ipv6_txoptions *opt;
123 struct dst_entry *dst;
127 if (addr_len < SIN6_LEN_RFC2133)
130 if (usin->sin6_family != AF_INET6)
131 return -EAFNOSUPPORT;
133 memset(&fl6, 0, sizeof(fl6));
136 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
137 IP6_ECN_flow_init(fl6.flowlabel);
138 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
139 struct ip6_flowlabel *flowlabel;
140 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
143 fl6_sock_release(flowlabel);
148 * connect() to INADDR_ANY means loopback (BSD'ism).
151 if (ipv6_addr_any(&usin->sin6_addr)) {
152 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
153 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
156 usin->sin6_addr = in6addr_loopback;
159 addr_type = ipv6_addr_type(&usin->sin6_addr);
161 if (addr_type & IPV6_ADDR_MULTICAST)
164 if (addr_type&IPV6_ADDR_LINKLOCAL) {
165 if (addr_len >= sizeof(struct sockaddr_in6) &&
166 usin->sin6_scope_id) {
167 /* If interface is set while binding, indices
170 if (sk->sk_bound_dev_if &&
171 sk->sk_bound_dev_if != usin->sin6_scope_id)
174 sk->sk_bound_dev_if = usin->sin6_scope_id;
177 /* Connect to link-local address requires an interface */
178 if (!sk->sk_bound_dev_if)
182 if (tp->rx_opt.ts_recent_stamp &&
183 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
184 tp->rx_opt.ts_recent = 0;
185 tp->rx_opt.ts_recent_stamp = 0;
189 sk->sk_v6_daddr = usin->sin6_addr;
190 np->flow_label = fl6.flowlabel;
196 if (addr_type & IPV6_ADDR_MAPPED) {
197 u32 exthdrlen = icsk->icsk_ext_hdr_len;
198 struct sockaddr_in sin;
200 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
202 if (__ipv6_only_sock(sk))
205 sin.sin_family = AF_INET;
206 sin.sin_port = usin->sin6_port;
207 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
209 icsk->icsk_af_ops = &ipv6_mapped;
210 sk->sk_backlog_rcv = tcp_v4_do_rcv;
211 #ifdef CONFIG_TCP_MD5SIG
212 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
215 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
218 icsk->icsk_ext_hdr_len = exthdrlen;
219 icsk->icsk_af_ops = &ipv6_specific;
220 sk->sk_backlog_rcv = tcp_v6_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_specific;
226 np->saddr = sk->sk_v6_rcv_saddr;
231 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
232 saddr = &sk->sk_v6_rcv_saddr;
234 fl6.flowi6_proto = IPPROTO_TCP;
235 fl6.daddr = sk->sk_v6_daddr;
236 fl6.saddr = saddr ? *saddr : np->saddr;
237 fl6.flowi6_oif = sk->sk_bound_dev_if;
238 fl6.flowi6_mark = sk->sk_mark;
239 fl6.fl6_dport = usin->sin6_port;
240 fl6.fl6_sport = inet->inet_sport;
242 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
243 final_p = fl6_update_dst(&fl6, opt, &final);
245 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
247 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
255 sk->sk_v6_rcv_saddr = *saddr;
258 /* set the source address */
260 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
262 sk->sk_gso_type = SKB_GSO_TCPV6;
263 ip6_dst_store(sk, dst, NULL, NULL);
265 if (tcp_death_row.sysctl_tw_recycle &&
266 !tp->rx_opt.ts_recent_stamp &&
267 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
268 tcp_fetch_timewait_stamp(sk, dst);
270 icsk->icsk_ext_hdr_len = 0;
272 icsk->icsk_ext_hdr_len = opt->opt_flen +
275 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
277 inet->inet_dport = usin->sin6_port;
279 tcp_set_state(sk, TCP_SYN_SENT);
280 err = inet6_hash_connect(&tcp_death_row, sk);
286 if (!tp->write_seq && likely(!tp->repair))
287 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
288 sk->sk_v6_daddr.s6_addr32,
292 err = tcp_connect(sk);
299 tcp_set_state(sk, TCP_CLOSE);
302 inet->inet_dport = 0;
303 sk->sk_route_caps = 0;
307 static void tcp_v6_mtu_reduced(struct sock *sk)
309 struct dst_entry *dst;
312 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
315 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
317 /* Drop requests trying to increase our current mss.
318 * Check done in __ip6_rt_update_pmtu() is too late.
320 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
323 dst = inet6_csk_update_pmtu(sk, mtu);
327 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
328 tcp_sync_mss(sk, dst_mtu(dst));
329 tcp_simple_retransmit(sk);
333 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
334 u8 type, u8 code, int offset, __be32 info)
336 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
337 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
338 struct net *net = dev_net(skb->dev);
339 struct request_sock *fastopen;
340 struct ipv6_pinfo *np;
347 sk = __inet6_lookup_established(net, &tcp_hashinfo,
348 &hdr->daddr, th->dest,
349 &hdr->saddr, ntohs(th->source),
353 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
358 if (sk->sk_state == TCP_TIME_WAIT) {
359 inet_twsk_put(inet_twsk(sk));
362 seq = ntohl(th->seq);
363 fatal = icmpv6_err_convert(type, code, &err);
364 if (sk->sk_state == TCP_NEW_SYN_RECV)
365 return tcp_req_err(sk, seq, fatal);
368 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
369 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
371 if (sk->sk_state == TCP_CLOSE)
374 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
375 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
380 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
381 fastopen = tp->fastopen_rsk;
382 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
383 if (sk->sk_state != TCP_LISTEN &&
384 !between(seq, snd_una, tp->snd_nxt)) {
385 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
391 if (type == NDISC_REDIRECT) {
392 if (!sock_owned_by_user(sk)) {
393 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
396 dst->ops->redirect(dst, sk, skb);
401 if (type == ICMPV6_PKT_TOOBIG) {
402 u32 mtu = ntohl(info);
404 /* We are not interested in TCP_LISTEN and open_requests
405 * (SYN-ACKs send out by Linux are always <576bytes so
406 * they should go through unfragmented).
408 if (sk->sk_state == TCP_LISTEN)
411 if (!ip6_sk_accept_pmtu(sk))
414 if (mtu < IPV6_MIN_MTU)
417 WRITE_ONCE(tp->mtu_info, mtu);
419 if (!sock_owned_by_user(sk))
420 tcp_v6_mtu_reduced(sk);
421 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
428 /* Might be for an request_sock */
429 switch (sk->sk_state) {
432 /* Only in fast or simultaneous open. If a fast open socket is
433 * is already accepted it is treated as a connected one below.
435 if (fastopen && !fastopen->sk)
438 if (!sock_owned_by_user(sk)) {
440 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
444 sk->sk_err_soft = err;
448 if (!sock_owned_by_user(sk) && np->recverr) {
450 sk->sk_error_report(sk);
452 sk->sk_err_soft = err;
460 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
462 struct request_sock *req,
463 struct tcp_fastopen_cookie *foc,
464 enum tcp_synack_type synack_type)
466 struct inet_request_sock *ireq = inet_rsk(req);
467 struct ipv6_pinfo *np = inet6_sk(sk);
468 struct ipv6_txoptions *opt;
469 struct flowi6 *fl6 = &fl->u.ip6;
473 /* First, grab a route. */
474 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
475 IPPROTO_TCP)) == NULL)
478 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
481 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
482 &ireq->ir_v6_rmt_addr);
484 fl6->daddr = ireq->ir_v6_rmt_addr;
485 if (np->repflow && ireq->pktopts)
486 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
489 opt = ireq->ipv6_opt;
491 opt = rcu_dereference(np->opt);
492 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
494 err = net_xmit_eval(err);
502 static void tcp_v6_reqsk_destructor(struct request_sock *req)
504 kfree(inet_rsk(req)->ipv6_opt);
505 kfree_skb(inet_rsk(req)->pktopts);
508 #ifdef CONFIG_TCP_MD5SIG
509 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
510 const struct in6_addr *addr)
512 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
515 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
516 const struct sock *addr_sk)
518 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
521 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
524 struct tcp_md5sig cmd;
525 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
527 if (optlen < sizeof(cmd))
530 if (copy_from_user(&cmd, optval, sizeof(cmd)))
533 if (sin6->sin6_family != AF_INET6)
536 if (!cmd.tcpm_keylen) {
537 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
538 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
540 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
544 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
547 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
548 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
549 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
551 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
552 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
555 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
556 const struct in6_addr *daddr,
557 const struct in6_addr *saddr,
558 const struct tcphdr *th, int nbytes)
560 struct tcp6_pseudohdr *bp;
561 struct scatterlist sg;
565 /* 1. TCP pseudo-header (RFC2460) */
568 bp->protocol = cpu_to_be32(IPPROTO_TCP);
569 bp->len = cpu_to_be32(nbytes);
571 _th = (struct tcphdr *)(bp + 1);
572 memcpy(_th, th, sizeof(*th));
575 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
576 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
577 sizeof(*bp) + sizeof(*th));
578 return crypto_ahash_update(hp->md5_req);
581 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
582 const struct in6_addr *daddr, struct in6_addr *saddr,
583 const struct tcphdr *th)
585 struct tcp_md5sig_pool *hp;
586 struct ahash_request *req;
588 hp = tcp_get_md5sig_pool();
590 goto clear_hash_noput;
593 if (crypto_ahash_init(req))
595 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
597 if (tcp_md5_hash_key(hp, key))
599 ahash_request_set_crypt(req, NULL, md5_hash, 0);
600 if (crypto_ahash_final(req))
603 tcp_put_md5sig_pool();
607 tcp_put_md5sig_pool();
609 memset(md5_hash, 0, 16);
613 static int tcp_v6_md5_hash_skb(char *md5_hash,
614 const struct tcp_md5sig_key *key,
615 const struct sock *sk,
616 const struct sk_buff *skb)
618 const struct in6_addr *saddr, *daddr;
619 struct tcp_md5sig_pool *hp;
620 struct ahash_request *req;
621 const struct tcphdr *th = tcp_hdr(skb);
623 if (sk) { /* valid for establish/request sockets */
624 saddr = &sk->sk_v6_rcv_saddr;
625 daddr = &sk->sk_v6_daddr;
627 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
628 saddr = &ip6h->saddr;
629 daddr = &ip6h->daddr;
632 hp = tcp_get_md5sig_pool();
634 goto clear_hash_noput;
637 if (crypto_ahash_init(req))
640 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
642 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
644 if (tcp_md5_hash_key(hp, key))
646 ahash_request_set_crypt(req, NULL, md5_hash, 0);
647 if (crypto_ahash_final(req))
650 tcp_put_md5sig_pool();
654 tcp_put_md5sig_pool();
656 memset(md5_hash, 0, 16);
662 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
663 const struct sk_buff *skb)
665 #ifdef CONFIG_TCP_MD5SIG
666 const __u8 *hash_location = NULL;
667 struct tcp_md5sig_key *hash_expected;
668 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
669 const struct tcphdr *th = tcp_hdr(skb);
673 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
674 hash_location = tcp_parse_md5sig_option(th);
676 /* We've parsed the options - do we have a hash? */
677 if (!hash_expected && !hash_location)
680 if (hash_expected && !hash_location) {
681 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
685 if (!hash_expected && hash_location) {
686 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
690 /* check the signature */
691 genhash = tcp_v6_md5_hash_skb(newhash,
695 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
696 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
697 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
698 genhash ? "failed" : "mismatch",
699 &ip6h->saddr, ntohs(th->source),
700 &ip6h->daddr, ntohs(th->dest));
707 static void tcp_v6_init_req(struct request_sock *req,
708 const struct sock *sk_listener,
711 struct inet_request_sock *ireq = inet_rsk(req);
712 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
714 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
715 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
717 /* So that link locals have meaning */
718 if (!sk_listener->sk_bound_dev_if &&
719 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
720 ireq->ir_iif = tcp_v6_iif(skb);
722 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
723 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
724 np->rxopt.bits.rxinfo ||
725 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
726 np->rxopt.bits.rxohlim || np->repflow)) {
727 atomic_inc(&skb->users);
732 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
734 const struct request_sock *req,
739 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
742 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
744 .obj_size = sizeof(struct tcp6_request_sock),
745 .rtx_syn_ack = tcp_rtx_synack,
746 .send_ack = tcp_v6_reqsk_send_ack,
747 .destructor = tcp_v6_reqsk_destructor,
748 .send_reset = tcp_v6_send_reset,
749 .syn_ack_timeout = tcp_syn_ack_timeout,
752 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
753 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
754 sizeof(struct ipv6hdr),
755 #ifdef CONFIG_TCP_MD5SIG
756 .req_md5_lookup = tcp_v6_md5_lookup,
757 .calc_md5_hash = tcp_v6_md5_hash_skb,
759 .init_req = tcp_v6_init_req,
760 #ifdef CONFIG_SYN_COOKIES
761 .cookie_init_seq = cookie_v6_init_sequence,
763 .route_req = tcp_v6_route_req,
764 .init_seq = tcp_v6_init_sequence,
765 .send_synack = tcp_v6_send_synack,
768 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
769 u32 ack, u32 win, u32 tsval, u32 tsecr,
770 int oif, struct tcp_md5sig_key *key, int rst,
771 u8 tclass, __be32 label)
773 const struct tcphdr *th = tcp_hdr(skb);
775 struct sk_buff *buff;
777 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
778 struct sock *ctl_sk = net->ipv6.tcp_sk;
779 unsigned int tot_len = sizeof(struct tcphdr);
780 struct dst_entry *dst;
784 tot_len += TCPOLEN_TSTAMP_ALIGNED;
785 #ifdef CONFIG_TCP_MD5SIG
787 tot_len += TCPOLEN_MD5SIG_ALIGNED;
790 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
795 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
797 t1 = (struct tcphdr *) skb_push(buff, tot_len);
798 skb_reset_transport_header(buff);
800 /* Swap the send and the receive. */
801 memset(t1, 0, sizeof(*t1));
802 t1->dest = th->source;
803 t1->source = th->dest;
804 t1->doff = tot_len / 4;
805 t1->seq = htonl(seq);
806 t1->ack_seq = htonl(ack);
807 t1->ack = !rst || !th->ack;
809 t1->window = htons(win);
811 topt = (__be32 *)(t1 + 1);
814 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
815 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
816 *topt++ = htonl(tsval);
817 *topt++ = htonl(tsecr);
820 #ifdef CONFIG_TCP_MD5SIG
822 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
823 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
824 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
825 &ipv6_hdr(skb)->saddr,
826 &ipv6_hdr(skb)->daddr, t1);
830 memset(&fl6, 0, sizeof(fl6));
831 fl6.daddr = ipv6_hdr(skb)->saddr;
832 fl6.saddr = ipv6_hdr(skb)->daddr;
833 fl6.flowlabel = label;
835 buff->ip_summed = CHECKSUM_PARTIAL;
838 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
840 fl6.flowi6_proto = IPPROTO_TCP;
841 if (rt6_need_strict(&fl6.daddr) && !oif)
842 fl6.flowi6_oif = tcp_v6_iif(skb);
844 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
847 fl6.flowi6_oif = oif;
850 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
851 fl6.fl6_dport = t1->dest;
852 fl6.fl6_sport = t1->source;
853 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
855 /* Pass a socket to ip6_dst_lookup either it is for RST
856 * Underlying function will use this to retrieve the network
859 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
861 skb_dst_set(buff, dst);
862 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
863 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
865 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
872 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
874 const struct tcphdr *th = tcp_hdr(skb);
875 u32 seq = 0, ack_seq = 0;
876 struct tcp_md5sig_key *key = NULL;
877 #ifdef CONFIG_TCP_MD5SIG
878 const __u8 *hash_location = NULL;
879 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
880 unsigned char newhash[16];
882 struct sock *sk1 = NULL;
889 /* If sk not NULL, it means we did a successful lookup and incoming
890 * route had to be correct. prequeue might have dropped our dst.
892 if (!sk && !ipv6_unicast_destination(skb))
895 #ifdef CONFIG_TCP_MD5SIG
897 hash_location = tcp_parse_md5sig_option(th);
898 if (sk && sk_fullsock(sk)) {
899 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
900 } else if (hash_location) {
902 * active side is lost. Try to find listening socket through
903 * source port, and then find md5 key through listening socket.
904 * we are not loose security here:
905 * Incoming packet is checked with md5 hash with finding key,
906 * no RST generated if md5 hash doesn't match.
908 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
909 &tcp_hashinfo, NULL, 0,
911 th->source, &ipv6h->daddr,
912 ntohs(th->source), tcp_v6_iif(skb));
916 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
920 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
921 if (genhash || memcmp(hash_location, newhash, 16) != 0)
927 seq = ntohl(th->ack_seq);
929 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
932 oif = sk ? sk->sk_bound_dev_if : 0;
933 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
935 #ifdef CONFIG_TCP_MD5SIG
941 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
942 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
943 struct tcp_md5sig_key *key, u8 tclass,
946 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
950 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
952 struct inet_timewait_sock *tw = inet_twsk(sk);
953 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
955 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
956 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
957 tcp_time_stamp + tcptw->tw_ts_offset,
958 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
959 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
964 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
965 struct request_sock *req)
967 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
968 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
971 * The window field (SEG.WND) of every outgoing segment, with the
972 * exception of <SYN> segments, MUST be right-shifted by
973 * Rcv.Wind.Shift bits:
975 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
976 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
977 tcp_rsk(req)->rcv_nxt,
978 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
979 tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
980 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
985 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
987 #ifdef CONFIG_SYN_COOKIES
988 const struct tcphdr *th = tcp_hdr(skb);
991 sk = cookie_v6_check(sk, skb);
996 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
998 if (skb->protocol == htons(ETH_P_IP))
999 return tcp_v4_conn_request(sk, skb);
1001 if (!ipv6_unicast_destination(skb))
1004 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1005 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1009 return tcp_conn_request(&tcp6_request_sock_ops,
1010 &tcp_request_sock_ipv6_ops, sk, skb);
1014 return 0; /* don't send reset */
1017 static void tcp_v6_restore_cb(struct sk_buff *skb)
1019 /* We need to move header back to the beginning if xfrm6_policy_check()
1020 * and tcp_v6_fill_cb() are going to be called again.
1021 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1023 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1024 sizeof(struct inet6_skb_parm));
1027 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1028 struct request_sock *req,
1029 struct dst_entry *dst,
1030 struct request_sock *req_unhash,
1033 struct inet_request_sock *ireq;
1034 struct ipv6_pinfo *newnp;
1035 const struct ipv6_pinfo *np = inet6_sk(sk);
1036 struct ipv6_txoptions *opt;
1037 struct tcp6_sock *newtcp6sk;
1038 struct inet_sock *newinet;
1039 struct tcp_sock *newtp;
1041 #ifdef CONFIG_TCP_MD5SIG
1042 struct tcp_md5sig_key *key;
1046 if (skb->protocol == htons(ETH_P_IP)) {
1051 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1052 req_unhash, own_req);
1057 newtcp6sk = (struct tcp6_sock *)newsk;
1058 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1060 newinet = inet_sk(newsk);
1061 newnp = inet6_sk(newsk);
1062 newtp = tcp_sk(newsk);
1064 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1066 newnp->saddr = newsk->sk_v6_rcv_saddr;
1068 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1069 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1070 #ifdef CONFIG_TCP_MD5SIG
1071 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1074 newnp->ipv6_mc_list = NULL;
1075 newnp->ipv6_ac_list = NULL;
1076 newnp->ipv6_fl_list = NULL;
1077 newnp->pktoptions = NULL;
1079 newnp->mcast_oif = inet_iif(skb);
1080 newnp->mcast_hops = ip_hdr(skb)->ttl;
1081 newnp->rcv_flowinfo = 0;
1083 newnp->flow_label = 0;
1086 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1087 * here, tcp_create_openreq_child now does this for us, see the comment in
1088 * that function for the gory details. -acme
1091 /* It is tricky place. Until this moment IPv4 tcp
1092 worked with IPv6 icsk.icsk_af_ops.
1095 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1100 ireq = inet_rsk(req);
1102 if (sk_acceptq_is_full(sk))
1106 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1111 newsk = tcp_create_openreq_child(sk, req, skb);
1116 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1117 * count here, tcp_create_openreq_child now does this for us, see the
1118 * comment in that function for the gory details. -acme
1121 newsk->sk_gso_type = SKB_GSO_TCPV6;
1122 ip6_dst_store(newsk, dst, NULL, NULL);
1123 inet6_sk_rx_dst_set(newsk, skb);
1125 newtcp6sk = (struct tcp6_sock *)newsk;
1126 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1128 newtp = tcp_sk(newsk);
1129 newinet = inet_sk(newsk);
1130 newnp = inet6_sk(newsk);
1132 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1134 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1135 newnp->saddr = ireq->ir_v6_loc_addr;
1136 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1137 newsk->sk_bound_dev_if = ireq->ir_iif;
1139 /* Now IPv6 options...
1141 First: no IPv4 options.
1143 newinet->inet_opt = NULL;
1144 newnp->ipv6_mc_list = NULL;
1145 newnp->ipv6_ac_list = NULL;
1146 newnp->ipv6_fl_list = NULL;
1149 newnp->rxopt.all = np->rxopt.all;
1151 newnp->pktoptions = NULL;
1153 newnp->mcast_oif = tcp_v6_iif(skb);
1154 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1155 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1157 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1159 /* Clone native IPv6 options from listening socket (if any)
1161 Yes, keeping reference count would be much more clever,
1162 but we make one more one thing there: reattach optmem
1165 opt = ireq->ipv6_opt;
1167 opt = rcu_dereference(np->opt);
1169 opt = ipv6_dup_options(newsk, opt);
1170 RCU_INIT_POINTER(newnp->opt, opt);
1172 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1174 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1177 tcp_ca_openreq_child(newsk, dst);
1179 tcp_sync_mss(newsk, dst_mtu(dst));
1180 newtp->advmss = dst_metric_advmss(dst);
1181 if (tcp_sk(sk)->rx_opt.user_mss &&
1182 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1183 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1185 tcp_initialize_rcv_mss(newsk);
1187 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1188 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1190 #ifdef CONFIG_TCP_MD5SIG
1191 /* Copy over the MD5 key from the original socket */
1192 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1194 /* We're using one, so create a matching key
1195 * on the newsk structure. If we fail to get
1196 * memory, then we end up not copying the key
1199 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1200 AF_INET6, key->key, key->keylen,
1201 sk_gfp_mask(sk, GFP_ATOMIC));
1205 if (__inet_inherit_port(sk, newsk) < 0) {
1206 inet_csk_prepare_forced_close(newsk);
1210 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1212 tcp_move_syn(newtp, req);
1214 /* Clone pktoptions received with SYN, if we own the req */
1215 if (ireq->pktopts) {
1216 newnp->pktoptions = skb_clone(ireq->pktopts,
1217 sk_gfp_mask(sk, GFP_ATOMIC));
1218 consume_skb(ireq->pktopts);
1219 ireq->pktopts = NULL;
1220 if (newnp->pktoptions) {
1221 tcp_v6_restore_cb(newnp->pktoptions);
1222 skb_set_owner_r(newnp->pktoptions, newsk);
1230 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1238 /* The socket must have it's spinlock held when we get
1239 * here, unless it is a TCP_LISTEN socket.
1241 * We have a potential double-lock case here, so even when
1242 * doing backlog processing we use the BH locking scheme.
1243 * This is because we cannot sleep with the original spinlock
1246 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1248 struct ipv6_pinfo *np = inet6_sk(sk);
1249 struct tcp_sock *tp;
1250 struct sk_buff *opt_skb = NULL;
1252 /* Imagine: socket is IPv6. IPv4 packet arrives,
1253 goes to IPv4 receive handler and backlogged.
1254 From backlog it always goes here. Kerboom...
1255 Fortunately, tcp_rcv_established and rcv_established
1256 handle them correctly, but it is not case with
1257 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1260 if (skb->protocol == htons(ETH_P_IP))
1261 return tcp_v4_do_rcv(sk, skb);
1263 if (tcp_filter(sk, skb))
1267 * socket locking is here for SMP purposes as backlog rcv
1268 * is currently called with bh processing disabled.
1271 /* Do Stevens' IPV6_PKTOPTIONS.
1273 Yes, guys, it is the only place in our code, where we
1274 may make it not affecting IPv4.
1275 The rest of code is protocol independent,
1276 and I do not like idea to uglify IPv4.
1278 Actually, all the idea behind IPV6_PKTOPTIONS
1279 looks not very well thought. For now we latch
1280 options, received in the last packet, enqueued
1281 by tcp. Feel free to propose better solution.
1285 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1287 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1288 struct dst_entry *dst = sk->sk_rx_dst;
1290 sock_rps_save_rxhash(sk, skb);
1291 sk_mark_napi_id(sk, skb);
1293 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1294 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1296 sk->sk_rx_dst = NULL;
1300 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1302 goto ipv6_pktoptions;
1306 if (tcp_checksum_complete(skb))
1309 if (sk->sk_state == TCP_LISTEN) {
1310 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1316 sock_rps_save_rxhash(nsk, skb);
1317 sk_mark_napi_id(nsk, skb);
1318 if (tcp_child_process(sk, nsk, skb))
1321 __kfree_skb(opt_skb);
1325 sock_rps_save_rxhash(sk, skb);
1327 if (tcp_rcv_state_process(sk, skb))
1330 goto ipv6_pktoptions;
1334 tcp_v6_send_reset(sk, skb);
1337 __kfree_skb(opt_skb);
1341 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1342 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1347 /* Do you ask, what is it?
1349 1. skb was enqueued by tcp.
1350 2. skb is added to tail of read queue, rather than out of order.
1351 3. socket is not in passive state.
1352 4. Finally, it really contains options, which user wants to receive.
1355 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1356 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1357 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1358 np->mcast_oif = tcp_v6_iif(opt_skb);
1359 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1360 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1361 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1362 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1364 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1365 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1366 skb_set_owner_r(opt_skb, sk);
1367 tcp_v6_restore_cb(opt_skb);
1368 opt_skb = xchg(&np->pktoptions, opt_skb);
1370 __kfree_skb(opt_skb);
1371 opt_skb = xchg(&np->pktoptions, NULL);
1379 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1380 const struct tcphdr *th)
1382 /* This is tricky: we move IP6CB at its correct location into
1383 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1384 * _decode_session6() uses IP6CB().
1385 * barrier() makes sure compiler won't play aliasing games.
1387 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1388 sizeof(struct inet6_skb_parm));
1391 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1392 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1393 skb->len - th->doff*4);
1394 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1395 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1396 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1397 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1398 TCP_SKB_CB(skb)->sacked = 0;
1401 static int tcp_v6_rcv(struct sk_buff *skb)
1403 const struct tcphdr *th;
1404 const struct ipv6hdr *hdr;
1408 struct net *net = dev_net(skb->dev);
1410 if (skb->pkt_type != PACKET_HOST)
1414 * Count it even if it's bad.
1416 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1418 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1421 th = (const struct tcphdr *)skb->data;
1423 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1425 if (!pskb_may_pull(skb, th->doff*4))
1428 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1431 th = (const struct tcphdr *)skb->data;
1432 hdr = ipv6_hdr(skb);
1435 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1436 th->source, th->dest, inet6_iif(skb),
1442 if (sk->sk_state == TCP_TIME_WAIT)
1445 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1446 struct request_sock *req = inet_reqsk(sk);
1449 sk = req->rsk_listener;
1450 tcp_v6_fill_cb(skb, hdr, th);
1451 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1452 sk_drops_add(sk, skb);
1456 if (tcp_checksum_complete(skb)) {
1460 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1461 inet_csk_reqsk_queue_drop_and_put(sk, req);
1466 nsk = tcp_check_req(sk, skb, req, false);
1469 goto discard_and_relse;
1473 tcp_v6_restore_cb(skb);
1474 } else if (tcp_child_process(sk, nsk, skb)) {
1475 tcp_v6_send_reset(nsk, skb);
1476 goto discard_and_relse;
1482 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1483 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1484 goto discard_and_relse;
1487 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1488 goto discard_and_relse;
1490 tcp_v6_fill_cb(skb, hdr, th);
1492 if (tcp_v6_inbound_md5_hash(sk, skb))
1493 goto discard_and_relse;
1495 if (tcp_filter(sk, skb))
1496 goto discard_and_relse;
1497 th = (const struct tcphdr *)skb->data;
1498 hdr = ipv6_hdr(skb);
1502 if (sk->sk_state == TCP_LISTEN) {
1503 ret = tcp_v6_do_rcv(sk, skb);
1504 goto put_and_return;
1507 sk_incoming_cpu_update(sk);
1509 bh_lock_sock_nested(sk);
1510 tcp_segs_in(tcp_sk(sk), skb);
1512 if (!sock_owned_by_user(sk)) {
1513 if (!tcp_prequeue(sk, skb))
1514 ret = tcp_v6_do_rcv(sk, skb);
1515 } else if (tcp_add_backlog(sk, skb)) {
1516 goto discard_and_relse;
1523 return ret ? -1 : 0;
1526 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1529 tcp_v6_fill_cb(skb, hdr, th);
1531 if (tcp_checksum_complete(skb)) {
1533 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1535 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1537 tcp_v6_send_reset(NULL, skb);
1545 sk_drops_add(sk, skb);
1551 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1552 inet_twsk_put(inet_twsk(sk));
1556 tcp_v6_fill_cb(skb, hdr, th);
1558 if (tcp_checksum_complete(skb)) {
1559 inet_twsk_put(inet_twsk(sk));
1563 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1568 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1569 skb, __tcp_hdrlen(th),
1570 &ipv6_hdr(skb)->saddr, th->source,
1571 &ipv6_hdr(skb)->daddr,
1572 ntohs(th->dest), tcp_v6_iif(skb));
1574 struct inet_timewait_sock *tw = inet_twsk(sk);
1575 inet_twsk_deschedule_put(tw);
1577 tcp_v6_restore_cb(skb);
1581 /* Fall through to ACK */
1584 tcp_v6_timewait_ack(sk, skb);
1587 tcp_v6_restore_cb(skb);
1588 tcp_v6_send_reset(sk, skb);
1589 inet_twsk_deschedule_put(inet_twsk(sk));
1591 case TCP_TW_SUCCESS:
1597 static void tcp_v6_early_demux(struct sk_buff *skb)
1599 const struct ipv6hdr *hdr;
1600 const struct tcphdr *th;
1603 if (skb->pkt_type != PACKET_HOST)
1606 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1609 hdr = ipv6_hdr(skb);
1612 if (th->doff < sizeof(struct tcphdr) / 4)
1615 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1616 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1617 &hdr->saddr, th->source,
1618 &hdr->daddr, ntohs(th->dest),
1622 skb->destructor = sock_edemux;
1623 if (sk_fullsock(sk)) {
1624 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1627 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1629 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1630 skb_dst_set_noref(skb, dst);
1635 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1636 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1637 .twsk_unique = tcp_twsk_unique,
1638 .twsk_destructor = tcp_twsk_destructor,
1641 static const struct inet_connection_sock_af_ops ipv6_specific = {
1642 .queue_xmit = inet6_csk_xmit,
1643 .send_check = tcp_v6_send_check,
1644 .rebuild_header = inet6_sk_rebuild_header,
1645 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1646 .conn_request = tcp_v6_conn_request,
1647 .syn_recv_sock = tcp_v6_syn_recv_sock,
1648 .net_header_len = sizeof(struct ipv6hdr),
1649 .net_frag_header_len = sizeof(struct frag_hdr),
1650 .setsockopt = ipv6_setsockopt,
1651 .getsockopt = ipv6_getsockopt,
1652 .addr2sockaddr = inet6_csk_addr2sockaddr,
1653 .sockaddr_len = sizeof(struct sockaddr_in6),
1654 .bind_conflict = inet6_csk_bind_conflict,
1655 #ifdef CONFIG_COMPAT
1656 .compat_setsockopt = compat_ipv6_setsockopt,
1657 .compat_getsockopt = compat_ipv6_getsockopt,
1659 .mtu_reduced = tcp_v6_mtu_reduced,
1662 #ifdef CONFIG_TCP_MD5SIG
1663 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1664 .md5_lookup = tcp_v6_md5_lookup,
1665 .calc_md5_hash = tcp_v6_md5_hash_skb,
1666 .md5_parse = tcp_v6_parse_md5_keys,
1671 * TCP over IPv4 via INET6 API
1673 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1674 .queue_xmit = ip_queue_xmit,
1675 .send_check = tcp_v4_send_check,
1676 .rebuild_header = inet_sk_rebuild_header,
1677 .sk_rx_dst_set = inet_sk_rx_dst_set,
1678 .conn_request = tcp_v6_conn_request,
1679 .syn_recv_sock = tcp_v6_syn_recv_sock,
1680 .net_header_len = sizeof(struct iphdr),
1681 .setsockopt = ipv6_setsockopt,
1682 .getsockopt = ipv6_getsockopt,
1683 .addr2sockaddr = inet6_csk_addr2sockaddr,
1684 .sockaddr_len = sizeof(struct sockaddr_in6),
1685 .bind_conflict = inet6_csk_bind_conflict,
1686 #ifdef CONFIG_COMPAT
1687 .compat_setsockopt = compat_ipv6_setsockopt,
1688 .compat_getsockopt = compat_ipv6_getsockopt,
1690 .mtu_reduced = tcp_v4_mtu_reduced,
1693 #ifdef CONFIG_TCP_MD5SIG
1694 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1695 .md5_lookup = tcp_v4_md5_lookup,
1696 .calc_md5_hash = tcp_v4_md5_hash_skb,
1697 .md5_parse = tcp_v6_parse_md5_keys,
1701 /* NOTE: A lot of things set to zero explicitly by call to
1702 * sk_alloc() so need not be done here.
1704 static int tcp_v6_init_sock(struct sock *sk)
1706 struct inet_connection_sock *icsk = inet_csk(sk);
1710 icsk->icsk_af_ops = &ipv6_specific;
1712 #ifdef CONFIG_TCP_MD5SIG
1713 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1719 static void tcp_v6_destroy_sock(struct sock *sk)
1721 tcp_v4_destroy_sock(sk);
1722 inet6_destroy_sock(sk);
1725 #ifdef CONFIG_PROC_FS
1726 /* Proc filesystem TCPv6 sock list dumping. */
1727 static void get_openreq6(struct seq_file *seq,
1728 const struct request_sock *req, int i)
1730 long ttd = req->rsk_timer.expires - jiffies;
1731 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1732 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1738 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1739 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1741 src->s6_addr32[0], src->s6_addr32[1],
1742 src->s6_addr32[2], src->s6_addr32[3],
1743 inet_rsk(req)->ir_num,
1744 dest->s6_addr32[0], dest->s6_addr32[1],
1745 dest->s6_addr32[2], dest->s6_addr32[3],
1746 ntohs(inet_rsk(req)->ir_rmt_port),
1748 0, 0, /* could print option size, but that is af dependent. */
1749 1, /* timers active (only the expire timer) */
1750 jiffies_to_clock_t(ttd),
1752 from_kuid_munged(seq_user_ns(seq),
1753 sock_i_uid(req->rsk_listener)),
1754 0, /* non standard timer */
1755 0, /* open_requests have no inode */
1759 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1761 const struct in6_addr *dest, *src;
1764 unsigned long timer_expires;
1765 const struct inet_sock *inet = inet_sk(sp);
1766 const struct tcp_sock *tp = tcp_sk(sp);
1767 const struct inet_connection_sock *icsk = inet_csk(sp);
1768 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1772 dest = &sp->sk_v6_daddr;
1773 src = &sp->sk_v6_rcv_saddr;
1774 destp = ntohs(inet->inet_dport);
1775 srcp = ntohs(inet->inet_sport);
1777 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1778 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
1779 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1781 timer_expires = icsk->icsk_timeout;
1782 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1784 timer_expires = icsk->icsk_timeout;
1785 } else if (timer_pending(&sp->sk_timer)) {
1787 timer_expires = sp->sk_timer.expires;
1790 timer_expires = jiffies;
1793 state = sk_state_load(sp);
1794 if (state == TCP_LISTEN)
1795 rx_queue = sp->sk_ack_backlog;
1797 /* Because we don't lock the socket,
1798 * we might find a transient negative value.
1800 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1803 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1804 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1806 src->s6_addr32[0], src->s6_addr32[1],
1807 src->s6_addr32[2], src->s6_addr32[3], srcp,
1808 dest->s6_addr32[0], dest->s6_addr32[1],
1809 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1811 tp->write_seq - tp->snd_una,
1814 jiffies_delta_to_clock_t(timer_expires - jiffies),
1815 icsk->icsk_retransmits,
1816 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1817 icsk->icsk_probes_out,
1819 atomic_read(&sp->sk_refcnt), sp,
1820 jiffies_to_clock_t(icsk->icsk_rto),
1821 jiffies_to_clock_t(icsk->icsk_ack.ato),
1822 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1824 state == TCP_LISTEN ?
1825 fastopenq->max_qlen :
1826 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1830 static void get_timewait6_sock(struct seq_file *seq,
1831 struct inet_timewait_sock *tw, int i)
1833 long delta = tw->tw_timer.expires - jiffies;
1834 const struct in6_addr *dest, *src;
1837 dest = &tw->tw_v6_daddr;
1838 src = &tw->tw_v6_rcv_saddr;
1839 destp = ntohs(tw->tw_dport);
1840 srcp = ntohs(tw->tw_sport);
1843 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1844 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1846 src->s6_addr32[0], src->s6_addr32[1],
1847 src->s6_addr32[2], src->s6_addr32[3], srcp,
1848 dest->s6_addr32[0], dest->s6_addr32[1],
1849 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1850 tw->tw_substate, 0, 0,
1851 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1852 atomic_read(&tw->tw_refcnt), tw);
1855 static int tcp6_seq_show(struct seq_file *seq, void *v)
1857 struct tcp_iter_state *st;
1858 struct sock *sk = v;
1860 if (v == SEQ_START_TOKEN) {
1865 "st tx_queue rx_queue tr tm->when retrnsmt"
1866 " uid timeout inode\n");
1871 if (sk->sk_state == TCP_TIME_WAIT)
1872 get_timewait6_sock(seq, v, st->num);
1873 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1874 get_openreq6(seq, v, st->num);
1876 get_tcp6_sock(seq, v, st->num);
1881 static const struct file_operations tcp6_afinfo_seq_fops = {
1882 .owner = THIS_MODULE,
1883 .open = tcp_seq_open,
1885 .llseek = seq_lseek,
1886 .release = seq_release_net
1889 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1892 .seq_fops = &tcp6_afinfo_seq_fops,
1894 .show = tcp6_seq_show,
1898 int __net_init tcp6_proc_init(struct net *net)
1900 return tcp_proc_register(net, &tcp6_seq_afinfo);
1903 void tcp6_proc_exit(struct net *net)
1905 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1909 struct proto tcpv6_prot = {
1911 .owner = THIS_MODULE,
1913 .connect = tcp_v6_connect,
1914 .disconnect = tcp_disconnect,
1915 .accept = inet_csk_accept,
1917 .init = tcp_v6_init_sock,
1918 .destroy = tcp_v6_destroy_sock,
1919 .shutdown = tcp_shutdown,
1920 .setsockopt = tcp_setsockopt,
1921 .getsockopt = tcp_getsockopt,
1922 .recvmsg = tcp_recvmsg,
1923 .sendmsg = tcp_sendmsg,
1924 .sendpage = tcp_sendpage,
1925 .backlog_rcv = tcp_v6_do_rcv,
1926 .release_cb = tcp_release_cb,
1928 .unhash = inet_unhash,
1929 .get_port = inet_csk_get_port,
1930 .enter_memory_pressure = tcp_enter_memory_pressure,
1931 .stream_memory_free = tcp_stream_memory_free,
1932 .sockets_allocated = &tcp_sockets_allocated,
1933 .memory_allocated = &tcp_memory_allocated,
1934 .memory_pressure = &tcp_memory_pressure,
1935 .orphan_count = &tcp_orphan_count,
1936 .sysctl_mem = sysctl_tcp_mem,
1937 .sysctl_wmem = sysctl_tcp_wmem,
1938 .sysctl_rmem = sysctl_tcp_rmem,
1939 .max_header = MAX_TCP_HEADER,
1940 .obj_size = sizeof(struct tcp6_sock),
1941 .slab_flags = SLAB_DESTROY_BY_RCU,
1942 .twsk_prot = &tcp6_timewait_sock_ops,
1943 .rsk_prot = &tcp6_request_sock_ops,
1944 .h.hashinfo = &tcp_hashinfo,
1945 .no_autobind = true,
1946 #ifdef CONFIG_COMPAT
1947 .compat_setsockopt = compat_tcp_setsockopt,
1948 .compat_getsockopt = compat_tcp_getsockopt,
1950 .diag_destroy = tcp_abort,
1953 static const struct inet6_protocol tcpv6_protocol = {
1954 .early_demux = tcp_v6_early_demux,
1955 .handler = tcp_v6_rcv,
1956 .err_handler = tcp_v6_err,
1957 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1960 static struct inet_protosw tcpv6_protosw = {
1961 .type = SOCK_STREAM,
1962 .protocol = IPPROTO_TCP,
1963 .prot = &tcpv6_prot,
1964 .ops = &inet6_stream_ops,
1965 .flags = INET_PROTOSW_PERMANENT |
1969 static int __net_init tcpv6_net_init(struct net *net)
1971 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1972 SOCK_RAW, IPPROTO_TCP, net);
1975 static void __net_exit tcpv6_net_exit(struct net *net)
1977 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1980 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1982 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
1985 static struct pernet_operations tcpv6_net_ops = {
1986 .init = tcpv6_net_init,
1987 .exit = tcpv6_net_exit,
1988 .exit_batch = tcpv6_net_exit_batch,
1991 int __init tcpv6_init(void)
1995 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
1999 /* register inet6 protocol */
2000 ret = inet6_register_protosw(&tcpv6_protosw);
2002 goto out_tcpv6_protocol;
2004 ret = register_pernet_subsys(&tcpv6_net_ops);
2006 goto out_tcpv6_protosw;
2011 inet6_unregister_protosw(&tcpv6_protosw);
2013 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2017 void tcpv6_exit(void)
2019 unregister_pernet_subsys(&tcpv6_net_ops);
2020 inet6_unregister_protosw(&tcpv6_protosw);
2021 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);