3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/inet_common.h>
63 #include <net/secure_seq.h>
64 #include <net/busy_poll.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 #include <crypto/hash.h>
70 #include <linux/scatterlist.h>
72 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
74 struct request_sock *req);
76 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78 static const struct inet_connection_sock_af_ops ipv6_mapped;
79 static const struct inet_connection_sock_af_ops ipv6_specific;
80 #ifdef CONFIG_TCP_MD5SIG
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
82 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
84 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
85 const struct in6_addr *addr)
91 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
93 struct dst_entry *dst = skb_dst(skb);
95 if (dst && dst_hold_safe(dst)) {
96 const struct rt6_info *rt = (const struct rt6_info *)dst;
98 rcu_assign_pointer(sk->sk_rx_dst, dst);
99 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
100 inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
104 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
106 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
107 ipv6_hdr(skb)->saddr.s6_addr32,
109 tcp_hdr(skb)->source);
112 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
115 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
116 struct inet_sock *inet = inet_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 struct ipv6_pinfo *np = inet6_sk(sk);
119 struct tcp_sock *tp = tcp_sk(sk);
120 struct in6_addr *saddr = NULL, *final_p, final;
121 struct ipv6_txoptions *opt;
123 struct dst_entry *dst;
127 if (addr_len < SIN6_LEN_RFC2133)
130 if (usin->sin6_family != AF_INET6)
131 return -EAFNOSUPPORT;
133 memset(&fl6, 0, sizeof(fl6));
136 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
137 IP6_ECN_flow_init(fl6.flowlabel);
138 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
139 struct ip6_flowlabel *flowlabel;
140 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
143 fl6_sock_release(flowlabel);
148 * connect() to INADDR_ANY means loopback (BSD'ism).
151 if (ipv6_addr_any(&usin->sin6_addr)) {
152 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
153 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
156 usin->sin6_addr = in6addr_loopback;
159 addr_type = ipv6_addr_type(&usin->sin6_addr);
161 if (addr_type & IPV6_ADDR_MULTICAST)
164 if (addr_type&IPV6_ADDR_LINKLOCAL) {
165 if (addr_len >= sizeof(struct sockaddr_in6) &&
166 usin->sin6_scope_id) {
167 /* If interface is set while binding, indices
170 if (sk->sk_bound_dev_if &&
171 sk->sk_bound_dev_if != usin->sin6_scope_id)
174 sk->sk_bound_dev_if = usin->sin6_scope_id;
177 /* Connect to link-local address requires an interface */
178 if (!sk->sk_bound_dev_if)
182 if (tp->rx_opt.ts_recent_stamp &&
183 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
184 tp->rx_opt.ts_recent = 0;
185 tp->rx_opt.ts_recent_stamp = 0;
189 sk->sk_v6_daddr = usin->sin6_addr;
190 np->flow_label = fl6.flowlabel;
196 if (addr_type & IPV6_ADDR_MAPPED) {
197 u32 exthdrlen = icsk->icsk_ext_hdr_len;
198 struct sockaddr_in sin;
200 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
202 if (__ipv6_only_sock(sk))
205 sin.sin_family = AF_INET;
206 sin.sin_port = usin->sin6_port;
207 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
209 icsk->icsk_af_ops = &ipv6_mapped;
210 sk->sk_backlog_rcv = tcp_v4_do_rcv;
211 #ifdef CONFIG_TCP_MD5SIG
212 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
215 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
218 icsk->icsk_ext_hdr_len = exthdrlen;
219 icsk->icsk_af_ops = &ipv6_specific;
220 sk->sk_backlog_rcv = tcp_v6_do_rcv;
221 #ifdef CONFIG_TCP_MD5SIG
222 tp->af_specific = &tcp_sock_ipv6_specific;
226 np->saddr = sk->sk_v6_rcv_saddr;
231 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
232 saddr = &sk->sk_v6_rcv_saddr;
234 fl6.flowi6_proto = IPPROTO_TCP;
235 fl6.daddr = sk->sk_v6_daddr;
236 fl6.saddr = saddr ? *saddr : np->saddr;
237 fl6.flowi6_oif = sk->sk_bound_dev_if;
238 fl6.flowi6_mark = sk->sk_mark;
239 fl6.fl6_dport = usin->sin6_port;
240 fl6.fl6_sport = inet->inet_sport;
242 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
243 final_p = fl6_update_dst(&fl6, opt, &final);
245 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
247 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
255 sk->sk_v6_rcv_saddr = *saddr;
258 /* set the source address */
260 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
262 sk->sk_gso_type = SKB_GSO_TCPV6;
263 ip6_dst_store(sk, dst, NULL, NULL);
265 if (tcp_death_row.sysctl_tw_recycle &&
266 !tp->rx_opt.ts_recent_stamp &&
267 ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr))
268 tcp_fetch_timewait_stamp(sk, dst);
270 icsk->icsk_ext_hdr_len = 0;
272 icsk->icsk_ext_hdr_len = opt->opt_flen +
275 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
277 inet->inet_dport = usin->sin6_port;
279 tcp_set_state(sk, TCP_SYN_SENT);
280 err = inet6_hash_connect(&tcp_death_row, sk);
286 if (!tp->write_seq && likely(!tp->repair))
287 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
288 sk->sk_v6_daddr.s6_addr32,
292 err = tcp_connect(sk);
299 tcp_set_state(sk, TCP_CLOSE);
302 inet->inet_dport = 0;
303 sk->sk_route_caps = 0;
307 static void tcp_v6_mtu_reduced(struct sock *sk)
309 struct dst_entry *dst;
312 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
315 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
317 /* Drop requests trying to increase our current mss.
318 * Check done in __ip6_rt_update_pmtu() is too late.
320 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
323 dst = inet6_csk_update_pmtu(sk, mtu);
327 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
328 tcp_sync_mss(sk, dst_mtu(dst));
329 tcp_simple_retransmit(sk);
333 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
334 u8 type, u8 code, int offset, __be32 info)
336 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
337 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
338 struct net *net = dev_net(skb->dev);
339 struct request_sock *fastopen;
340 struct ipv6_pinfo *np;
347 sk = __inet6_lookup_established(net, &tcp_hashinfo,
348 &hdr->daddr, th->dest,
349 &hdr->saddr, ntohs(th->source),
353 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
358 if (sk->sk_state == TCP_TIME_WAIT) {
359 inet_twsk_put(inet_twsk(sk));
362 seq = ntohl(th->seq);
363 fatal = icmpv6_err_convert(type, code, &err);
364 if (sk->sk_state == TCP_NEW_SYN_RECV)
365 return tcp_req_err(sk, seq, fatal);
368 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
369 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
371 if (sk->sk_state == TCP_CLOSE)
374 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
375 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
380 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
381 fastopen = tp->fastopen_rsk;
382 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
383 if (sk->sk_state != TCP_LISTEN &&
384 !between(seq, snd_una, tp->snd_nxt)) {
385 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
391 if (type == NDISC_REDIRECT) {
392 if (!sock_owned_by_user(sk)) {
393 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
396 dst->ops->redirect(dst, sk, skb);
401 if (type == ICMPV6_PKT_TOOBIG) {
402 u32 mtu = ntohl(info);
404 /* We are not interested in TCP_LISTEN and open_requests
405 * (SYN-ACKs send out by Linux are always <576bytes so
406 * they should go through unfragmented).
408 if (sk->sk_state == TCP_LISTEN)
411 if (!ip6_sk_accept_pmtu(sk))
414 if (mtu < IPV6_MIN_MTU)
417 WRITE_ONCE(tp->mtu_info, mtu);
419 if (!sock_owned_by_user(sk))
420 tcp_v6_mtu_reduced(sk);
421 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
428 /* Might be for an request_sock */
429 switch (sk->sk_state) {
432 /* Only in fast or simultaneous open. If a fast open socket is
433 * is already accepted it is treated as a connected one below.
435 if (fastopen && !fastopen->sk)
438 if (!sock_owned_by_user(sk)) {
440 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
444 sk->sk_err_soft = err;
448 if (!sock_owned_by_user(sk) && np->recverr) {
450 sk->sk_error_report(sk);
452 sk->sk_err_soft = err;
460 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
462 struct request_sock *req,
463 struct tcp_fastopen_cookie *foc,
464 enum tcp_synack_type synack_type)
466 struct inet_request_sock *ireq = inet_rsk(req);
467 struct ipv6_pinfo *np = inet6_sk(sk);
468 struct ipv6_txoptions *opt;
469 struct flowi6 *fl6 = &fl->u.ip6;
473 /* First, grab a route. */
474 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
475 IPPROTO_TCP)) == NULL)
478 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
481 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
482 &ireq->ir_v6_rmt_addr);
484 fl6->daddr = ireq->ir_v6_rmt_addr;
485 if (np->repflow && ireq->pktopts)
486 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
489 opt = ireq->ipv6_opt;
491 opt = rcu_dereference(np->opt);
492 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
494 err = net_xmit_eval(err);
502 static void tcp_v6_reqsk_destructor(struct request_sock *req)
504 kfree(inet_rsk(req)->ipv6_opt);
505 kfree_skb(inet_rsk(req)->pktopts);
508 #ifdef CONFIG_TCP_MD5SIG
509 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
510 const struct in6_addr *addr)
512 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
515 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
516 const struct sock *addr_sk)
518 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
521 static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
524 struct tcp_md5sig cmd;
525 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
527 if (optlen < sizeof(cmd))
530 if (copy_from_user(&cmd, optval, sizeof(cmd)))
533 if (sin6->sin6_family != AF_INET6)
536 if (!cmd.tcpm_keylen) {
537 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
538 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
540 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
544 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
547 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
548 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
549 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
551 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
552 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
555 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
556 const struct in6_addr *daddr,
557 const struct in6_addr *saddr,
558 const struct tcphdr *th, int nbytes)
560 struct tcp6_pseudohdr *bp;
561 struct scatterlist sg;
565 /* 1. TCP pseudo-header (RFC2460) */
568 bp->protocol = cpu_to_be32(IPPROTO_TCP);
569 bp->len = cpu_to_be32(nbytes);
571 _th = (struct tcphdr *)(bp + 1);
572 memcpy(_th, th, sizeof(*th));
575 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
576 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
577 sizeof(*bp) + sizeof(*th));
578 return crypto_ahash_update(hp->md5_req);
581 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
582 const struct in6_addr *daddr, struct in6_addr *saddr,
583 const struct tcphdr *th)
585 struct tcp_md5sig_pool *hp;
586 struct ahash_request *req;
588 hp = tcp_get_md5sig_pool();
590 goto clear_hash_noput;
593 if (crypto_ahash_init(req))
595 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
597 if (tcp_md5_hash_key(hp, key))
599 ahash_request_set_crypt(req, NULL, md5_hash, 0);
600 if (crypto_ahash_final(req))
603 tcp_put_md5sig_pool();
607 tcp_put_md5sig_pool();
609 memset(md5_hash, 0, 16);
613 static int tcp_v6_md5_hash_skb(char *md5_hash,
614 const struct tcp_md5sig_key *key,
615 const struct sock *sk,
616 const struct sk_buff *skb)
618 const struct in6_addr *saddr, *daddr;
619 struct tcp_md5sig_pool *hp;
620 struct ahash_request *req;
621 const struct tcphdr *th = tcp_hdr(skb);
623 if (sk) { /* valid for establish/request sockets */
624 saddr = &sk->sk_v6_rcv_saddr;
625 daddr = &sk->sk_v6_daddr;
627 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
628 saddr = &ip6h->saddr;
629 daddr = &ip6h->daddr;
632 hp = tcp_get_md5sig_pool();
634 goto clear_hash_noput;
637 if (crypto_ahash_init(req))
640 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
642 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
644 if (tcp_md5_hash_key(hp, key))
646 ahash_request_set_crypt(req, NULL, md5_hash, 0);
647 if (crypto_ahash_final(req))
650 tcp_put_md5sig_pool();
654 tcp_put_md5sig_pool();
656 memset(md5_hash, 0, 16);
662 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
663 const struct sk_buff *skb)
665 #ifdef CONFIG_TCP_MD5SIG
666 const __u8 *hash_location = NULL;
667 struct tcp_md5sig_key *hash_expected;
668 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
669 const struct tcphdr *th = tcp_hdr(skb);
673 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
674 hash_location = tcp_parse_md5sig_option(th);
676 /* We've parsed the options - do we have a hash? */
677 if (!hash_expected && !hash_location)
680 if (hash_expected && !hash_location) {
681 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
685 if (!hash_expected && hash_location) {
686 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
690 /* check the signature */
691 genhash = tcp_v6_md5_hash_skb(newhash,
695 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
696 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
697 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
698 genhash ? "failed" : "mismatch",
699 &ip6h->saddr, ntohs(th->source),
700 &ip6h->daddr, ntohs(th->dest));
707 static void tcp_v6_init_req(struct request_sock *req,
708 const struct sock *sk_listener,
711 struct inet_request_sock *ireq = inet_rsk(req);
712 const struct ipv6_pinfo *np = inet6_sk(sk_listener);
714 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
715 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
717 /* So that link locals have meaning */
718 if (!sk_listener->sk_bound_dev_if &&
719 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
720 ireq->ir_iif = tcp_v6_iif(skb);
722 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
723 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
724 np->rxopt.bits.rxinfo ||
725 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
726 np->rxopt.bits.rxohlim || np->repflow)) {
727 atomic_inc(&skb->users);
732 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
734 const struct request_sock *req,
739 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
742 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
744 .obj_size = sizeof(struct tcp6_request_sock),
745 .rtx_syn_ack = tcp_rtx_synack,
746 .send_ack = tcp_v6_reqsk_send_ack,
747 .destructor = tcp_v6_reqsk_destructor,
748 .send_reset = tcp_v6_send_reset,
749 .syn_ack_timeout = tcp_syn_ack_timeout,
752 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
753 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
754 sizeof(struct ipv6hdr),
755 #ifdef CONFIG_TCP_MD5SIG
756 .req_md5_lookup = tcp_v6_md5_lookup,
757 .calc_md5_hash = tcp_v6_md5_hash_skb,
759 .init_req = tcp_v6_init_req,
760 #ifdef CONFIG_SYN_COOKIES
761 .cookie_init_seq = cookie_v6_init_sequence,
763 .route_req = tcp_v6_route_req,
764 .init_seq = tcp_v6_init_sequence,
765 .send_synack = tcp_v6_send_synack,
768 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
769 u32 ack, u32 win, u32 tsval, u32 tsecr,
770 int oif, struct tcp_md5sig_key *key, int rst,
771 u8 tclass, __be32 label)
773 const struct tcphdr *th = tcp_hdr(skb);
775 struct sk_buff *buff;
777 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
778 struct sock *ctl_sk = net->ipv6.tcp_sk;
779 unsigned int tot_len = sizeof(struct tcphdr);
780 struct dst_entry *dst;
784 tot_len += TCPOLEN_TSTAMP_ALIGNED;
785 #ifdef CONFIG_TCP_MD5SIG
787 tot_len += TCPOLEN_MD5SIG_ALIGNED;
790 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
795 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
797 t1 = (struct tcphdr *) skb_push(buff, tot_len);
798 skb_reset_transport_header(buff);
800 /* Swap the send and the receive. */
801 memset(t1, 0, sizeof(*t1));
802 t1->dest = th->source;
803 t1->source = th->dest;
804 t1->doff = tot_len / 4;
805 t1->seq = htonl(seq);
806 t1->ack_seq = htonl(ack);
807 t1->ack = !rst || !th->ack;
809 t1->window = htons(win);
811 topt = (__be32 *)(t1 + 1);
814 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
815 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
816 *topt++ = htonl(tsval);
817 *topt++ = htonl(tsecr);
820 #ifdef CONFIG_TCP_MD5SIG
822 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
823 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
824 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
825 &ipv6_hdr(skb)->saddr,
826 &ipv6_hdr(skb)->daddr, t1);
830 memset(&fl6, 0, sizeof(fl6));
831 fl6.daddr = ipv6_hdr(skb)->saddr;
832 fl6.saddr = ipv6_hdr(skb)->daddr;
833 fl6.flowlabel = label;
835 buff->ip_summed = CHECKSUM_PARTIAL;
838 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
840 fl6.flowi6_proto = IPPROTO_TCP;
841 if (rt6_need_strict(&fl6.daddr) && !oif)
842 fl6.flowi6_oif = tcp_v6_iif(skb);
844 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
847 fl6.flowi6_oif = oif;
850 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
851 fl6.fl6_dport = t1->dest;
852 fl6.fl6_sport = t1->source;
853 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
855 /* Pass a socket to ip6_dst_lookup either it is for RST
856 * Underlying function will use this to retrieve the network
859 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
861 skb_dst_set(buff, dst);
862 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
863 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
865 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
872 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
874 const struct tcphdr *th = tcp_hdr(skb);
875 u32 seq = 0, ack_seq = 0;
876 struct tcp_md5sig_key *key = NULL;
877 #ifdef CONFIG_TCP_MD5SIG
878 const __u8 *hash_location = NULL;
879 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
880 unsigned char newhash[16];
882 struct sock *sk1 = NULL;
889 /* If sk not NULL, it means we did a successful lookup and incoming
890 * route had to be correct. prequeue might have dropped our dst.
892 if (!sk && !ipv6_unicast_destination(skb))
895 #ifdef CONFIG_TCP_MD5SIG
897 hash_location = tcp_parse_md5sig_option(th);
898 if (sk && sk_fullsock(sk)) {
899 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
900 } else if (hash_location) {
902 * active side is lost. Try to find listening socket through
903 * source port, and then find md5 key through listening socket.
904 * we are not loose security here:
905 * Incoming packet is checked with md5 hash with finding key,
906 * no RST generated if md5 hash doesn't match.
908 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
909 &tcp_hashinfo, NULL, 0,
911 th->source, &ipv6h->daddr,
912 ntohs(th->source), tcp_v6_iif(skb));
916 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
920 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
921 if (genhash || memcmp(hash_location, newhash, 16) != 0)
927 seq = ntohl(th->ack_seq);
929 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
932 oif = sk ? sk->sk_bound_dev_if : 0;
933 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
935 #ifdef CONFIG_TCP_MD5SIG
941 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
942 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
943 struct tcp_md5sig_key *key, u8 tclass,
946 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
950 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
952 struct inet_timewait_sock *tw = inet_twsk(sk);
953 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
955 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
956 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
957 tcp_time_stamp + tcptw->tw_ts_offset,
958 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
959 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
964 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
965 struct request_sock *req)
967 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
968 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
971 * The window field (SEG.WND) of every outgoing segment, with the
972 * exception of <SYN> segments, MUST be right-shifted by
973 * Rcv.Wind.Shift bits:
975 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
976 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
977 tcp_rsk(req)->rcv_nxt,
978 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
979 tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
980 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
985 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
987 #ifdef CONFIG_SYN_COOKIES
988 const struct tcphdr *th = tcp_hdr(skb);
991 sk = cookie_v6_check(sk, skb);
996 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
998 if (skb->protocol == htons(ETH_P_IP))
999 return tcp_v4_conn_request(sk, skb);
1001 if (!ipv6_unicast_destination(skb))
1004 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1005 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1009 return tcp_conn_request(&tcp6_request_sock_ops,
1010 &tcp_request_sock_ipv6_ops, sk, skb);
1014 return 0; /* don't send reset */
1017 static void tcp_v6_restore_cb(struct sk_buff *skb)
1019 /* We need to move header back to the beginning if xfrm6_policy_check()
1020 * and tcp_v6_fill_cb() are going to be called again.
1021 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1023 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1024 sizeof(struct inet6_skb_parm));
1027 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1028 struct request_sock *req,
1029 struct dst_entry *dst,
1030 struct request_sock *req_unhash,
1033 struct inet_request_sock *ireq;
1034 struct ipv6_pinfo *newnp;
1035 const struct ipv6_pinfo *np = inet6_sk(sk);
1036 struct ipv6_txoptions *opt;
1037 struct tcp6_sock *newtcp6sk;
1038 struct inet_sock *newinet;
1039 struct tcp_sock *newtp;
1041 #ifdef CONFIG_TCP_MD5SIG
1042 struct tcp_md5sig_key *key;
1046 if (skb->protocol == htons(ETH_P_IP)) {
1051 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1052 req_unhash, own_req);
1057 newtcp6sk = (struct tcp6_sock *)newsk;
1058 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1060 newinet = inet_sk(newsk);
1061 newnp = inet6_sk(newsk);
1062 newtp = tcp_sk(newsk);
1064 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1066 newnp->saddr = newsk->sk_v6_rcv_saddr;
1068 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1069 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1070 #ifdef CONFIG_TCP_MD5SIG
1071 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1074 newnp->ipv6_mc_list = NULL;
1075 newnp->ipv6_ac_list = NULL;
1076 newnp->ipv6_fl_list = NULL;
1077 newnp->pktoptions = NULL;
1079 newnp->mcast_oif = inet_iif(skb);
1080 newnp->mcast_hops = ip_hdr(skb)->ttl;
1081 newnp->rcv_flowinfo = 0;
1083 newnp->flow_label = 0;
1086 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1087 * here, tcp_create_openreq_child now does this for us, see the comment in
1088 * that function for the gory details. -acme
1091 /* It is tricky place. Until this moment IPv4 tcp
1092 worked with IPv6 icsk.icsk_af_ops.
1095 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1100 ireq = inet_rsk(req);
1102 if (sk_acceptq_is_full(sk))
1106 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1111 newsk = tcp_create_openreq_child(sk, req, skb);
1116 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1117 * count here, tcp_create_openreq_child now does this for us, see the
1118 * comment in that function for the gory details. -acme
1121 newsk->sk_gso_type = SKB_GSO_TCPV6;
1122 ip6_dst_store(newsk, dst, NULL, NULL);
1123 inet6_sk_rx_dst_set(newsk, skb);
1125 newtcp6sk = (struct tcp6_sock *)newsk;
1126 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1128 newtp = tcp_sk(newsk);
1129 newinet = inet_sk(newsk);
1130 newnp = inet6_sk(newsk);
1132 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1134 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1135 newnp->saddr = ireq->ir_v6_loc_addr;
1136 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1137 newsk->sk_bound_dev_if = ireq->ir_iif;
1139 /* Now IPv6 options...
1141 First: no IPv4 options.
1143 newinet->inet_opt = NULL;
1144 newnp->ipv6_mc_list = NULL;
1145 newnp->ipv6_ac_list = NULL;
1146 newnp->ipv6_fl_list = NULL;
1149 newnp->rxopt.all = np->rxopt.all;
1151 newnp->pktoptions = NULL;
1153 newnp->mcast_oif = tcp_v6_iif(skb);
1154 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1155 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1157 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1159 /* Clone native IPv6 options from listening socket (if any)
1161 Yes, keeping reference count would be much more clever,
1162 but we make one more one thing there: reattach optmem
1165 opt = ireq->ipv6_opt;
1167 opt = rcu_dereference(np->opt);
1169 opt = ipv6_dup_options(newsk, opt);
1170 RCU_INIT_POINTER(newnp->opt, opt);
1172 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1174 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1177 tcp_ca_openreq_child(newsk, dst);
1179 tcp_sync_mss(newsk, dst_mtu(dst));
1180 newtp->advmss = dst_metric_advmss(dst);
1181 if (tcp_sk(sk)->rx_opt.user_mss &&
1182 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1183 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1185 tcp_initialize_rcv_mss(newsk);
1187 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1188 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1190 #ifdef CONFIG_TCP_MD5SIG
1191 /* Copy over the MD5 key from the original socket */
1192 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1194 /* We're using one, so create a matching key
1195 * on the newsk structure. If we fail to get
1196 * memory, then we end up not copying the key
1199 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1200 AF_INET6, key->key, key->keylen,
1201 sk_gfp_mask(sk, GFP_ATOMIC));
1205 if (__inet_inherit_port(sk, newsk) < 0) {
1206 inet_csk_prepare_forced_close(newsk);
1210 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1212 tcp_move_syn(newtp, req);
1214 /* Clone pktoptions received with SYN, if we own the req */
1215 if (ireq->pktopts) {
1216 newnp->pktoptions = skb_clone(ireq->pktopts,
1217 sk_gfp_mask(sk, GFP_ATOMIC));
1218 consume_skb(ireq->pktopts);
1219 ireq->pktopts = NULL;
1220 if (newnp->pktoptions) {
1221 tcp_v6_restore_cb(newnp->pktoptions);
1222 skb_set_owner_r(newnp->pktoptions, newsk);
1230 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1238 /* The socket must have it's spinlock held when we get
1239 * here, unless it is a TCP_LISTEN socket.
1241 * We have a potential double-lock case here, so even when
1242 * doing backlog processing we use the BH locking scheme.
1243 * This is because we cannot sleep with the original spinlock
1246 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1248 struct ipv6_pinfo *np = inet6_sk(sk);
1249 struct tcp_sock *tp;
1250 struct sk_buff *opt_skb = NULL;
1252 /* Imagine: socket is IPv6. IPv4 packet arrives,
1253 goes to IPv4 receive handler and backlogged.
1254 From backlog it always goes here. Kerboom...
1255 Fortunately, tcp_rcv_established and rcv_established
1256 handle them correctly, but it is not case with
1257 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1260 if (skb->protocol == htons(ETH_P_IP))
1261 return tcp_v4_do_rcv(sk, skb);
1263 if (tcp_filter(sk, skb))
1267 * socket locking is here for SMP purposes as backlog rcv
1268 * is currently called with bh processing disabled.
1271 /* Do Stevens' IPV6_PKTOPTIONS.
1273 Yes, guys, it is the only place in our code, where we
1274 may make it not affecting IPv4.
1275 The rest of code is protocol independent,
1276 and I do not like idea to uglify IPv4.
1278 Actually, all the idea behind IPV6_PKTOPTIONS
1279 looks not very well thought. For now we latch
1280 options, received in the last packet, enqueued
1281 by tcp. Feel free to propose better solution.
1285 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1287 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1288 struct dst_entry *dst;
1290 dst = rcu_dereference_protected(sk->sk_rx_dst,
1291 lockdep_sock_is_held(sk));
1293 sock_rps_save_rxhash(sk, skb);
1294 sk_mark_napi_id(sk, skb);
1296 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1297 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1298 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1303 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1305 goto ipv6_pktoptions;
1309 if (tcp_checksum_complete(skb))
1312 if (sk->sk_state == TCP_LISTEN) {
1313 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1319 sock_rps_save_rxhash(nsk, skb);
1320 sk_mark_napi_id(nsk, skb);
1321 if (tcp_child_process(sk, nsk, skb))
1324 __kfree_skb(opt_skb);
1328 sock_rps_save_rxhash(sk, skb);
1330 if (tcp_rcv_state_process(sk, skb))
1333 goto ipv6_pktoptions;
1337 tcp_v6_send_reset(sk, skb);
1340 __kfree_skb(opt_skb);
1344 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1345 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1350 /* Do you ask, what is it?
1352 1. skb was enqueued by tcp.
1353 2. skb is added to tail of read queue, rather than out of order.
1354 3. socket is not in passive state.
1355 4. Finally, it really contains options, which user wants to receive.
1358 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1359 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1360 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1361 np->mcast_oif = tcp_v6_iif(opt_skb);
1362 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1363 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1364 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1365 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1367 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1368 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1369 skb_set_owner_r(opt_skb, sk);
1370 tcp_v6_restore_cb(opt_skb);
1371 opt_skb = xchg(&np->pktoptions, opt_skb);
1373 __kfree_skb(opt_skb);
1374 opt_skb = xchg(&np->pktoptions, NULL);
1382 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1383 const struct tcphdr *th)
1385 /* This is tricky: we move IP6CB at its correct location into
1386 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1387 * _decode_session6() uses IP6CB().
1388 * barrier() makes sure compiler won't play aliasing games.
1390 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1391 sizeof(struct inet6_skb_parm));
1394 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1395 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1396 skb->len - th->doff*4);
1397 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1398 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1399 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1400 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1401 TCP_SKB_CB(skb)->sacked = 0;
1404 static int tcp_v6_rcv(struct sk_buff *skb)
1406 const struct tcphdr *th;
1407 const struct ipv6hdr *hdr;
1411 struct net *net = dev_net(skb->dev);
1413 if (skb->pkt_type != PACKET_HOST)
1417 * Count it even if it's bad.
1419 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1421 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1424 th = (const struct tcphdr *)skb->data;
1426 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1428 if (!pskb_may_pull(skb, th->doff*4))
1431 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1434 th = (const struct tcphdr *)skb->data;
1435 hdr = ipv6_hdr(skb);
1438 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1439 th->source, th->dest, inet6_iif(skb),
1445 if (sk->sk_state == TCP_TIME_WAIT)
1448 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1449 struct request_sock *req = inet_reqsk(sk);
1452 sk = req->rsk_listener;
1453 tcp_v6_fill_cb(skb, hdr, th);
1454 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1455 sk_drops_add(sk, skb);
1459 if (tcp_checksum_complete(skb)) {
1463 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1464 inet_csk_reqsk_queue_drop_and_put(sk, req);
1469 nsk = tcp_check_req(sk, skb, req, false);
1472 goto discard_and_relse;
1476 tcp_v6_restore_cb(skb);
1477 } else if (tcp_child_process(sk, nsk, skb)) {
1478 tcp_v6_send_reset(nsk, skb);
1479 goto discard_and_relse;
1485 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1486 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1487 goto discard_and_relse;
1490 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1491 goto discard_and_relse;
1493 tcp_v6_fill_cb(skb, hdr, th);
1495 if (tcp_v6_inbound_md5_hash(sk, skb))
1496 goto discard_and_relse;
1498 if (tcp_filter(sk, skb))
1499 goto discard_and_relse;
1500 th = (const struct tcphdr *)skb->data;
1501 hdr = ipv6_hdr(skb);
1505 if (sk->sk_state == TCP_LISTEN) {
1506 ret = tcp_v6_do_rcv(sk, skb);
1507 goto put_and_return;
1510 sk_incoming_cpu_update(sk);
1512 bh_lock_sock_nested(sk);
1513 tcp_segs_in(tcp_sk(sk), skb);
1515 if (!sock_owned_by_user(sk)) {
1516 if (!tcp_prequeue(sk, skb))
1517 ret = tcp_v6_do_rcv(sk, skb);
1518 } else if (tcp_add_backlog(sk, skb)) {
1519 goto discard_and_relse;
1526 return ret ? -1 : 0;
1529 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1532 tcp_v6_fill_cb(skb, hdr, th);
1534 if (tcp_checksum_complete(skb)) {
1536 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1538 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1540 tcp_v6_send_reset(NULL, skb);
1548 sk_drops_add(sk, skb);
1554 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1555 inet_twsk_put(inet_twsk(sk));
1559 tcp_v6_fill_cb(skb, hdr, th);
1561 if (tcp_checksum_complete(skb)) {
1562 inet_twsk_put(inet_twsk(sk));
1566 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1571 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1572 skb, __tcp_hdrlen(th),
1573 &ipv6_hdr(skb)->saddr, th->source,
1574 &ipv6_hdr(skb)->daddr,
1575 ntohs(th->dest), tcp_v6_iif(skb));
1577 struct inet_timewait_sock *tw = inet_twsk(sk);
1578 inet_twsk_deschedule_put(tw);
1580 tcp_v6_restore_cb(skb);
1584 /* Fall through to ACK */
1587 tcp_v6_timewait_ack(sk, skb);
1590 tcp_v6_restore_cb(skb);
1591 tcp_v6_send_reset(sk, skb);
1592 inet_twsk_deschedule_put(inet_twsk(sk));
1594 case TCP_TW_SUCCESS:
1600 static void tcp_v6_early_demux(struct sk_buff *skb)
1602 const struct ipv6hdr *hdr;
1603 const struct tcphdr *th;
1606 if (skb->pkt_type != PACKET_HOST)
1609 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1612 hdr = ipv6_hdr(skb);
1615 if (th->doff < sizeof(struct tcphdr) / 4)
1618 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1619 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1620 &hdr->saddr, th->source,
1621 &hdr->daddr, ntohs(th->dest),
1625 skb->destructor = sock_edemux;
1626 if (sk_fullsock(sk)) {
1627 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1630 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1632 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1633 skb_dst_set_noref(skb, dst);
1638 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1639 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1640 .twsk_unique = tcp_twsk_unique,
1641 .twsk_destructor = tcp_twsk_destructor,
1644 static const struct inet_connection_sock_af_ops ipv6_specific = {
1645 .queue_xmit = inet6_csk_xmit,
1646 .send_check = tcp_v6_send_check,
1647 .rebuild_header = inet6_sk_rebuild_header,
1648 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1649 .conn_request = tcp_v6_conn_request,
1650 .syn_recv_sock = tcp_v6_syn_recv_sock,
1651 .net_header_len = sizeof(struct ipv6hdr),
1652 .net_frag_header_len = sizeof(struct frag_hdr),
1653 .setsockopt = ipv6_setsockopt,
1654 .getsockopt = ipv6_getsockopt,
1655 .addr2sockaddr = inet6_csk_addr2sockaddr,
1656 .sockaddr_len = sizeof(struct sockaddr_in6),
1657 .bind_conflict = inet6_csk_bind_conflict,
1658 #ifdef CONFIG_COMPAT
1659 .compat_setsockopt = compat_ipv6_setsockopt,
1660 .compat_getsockopt = compat_ipv6_getsockopt,
1662 .mtu_reduced = tcp_v6_mtu_reduced,
1665 #ifdef CONFIG_TCP_MD5SIG
1666 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1667 .md5_lookup = tcp_v6_md5_lookup,
1668 .calc_md5_hash = tcp_v6_md5_hash_skb,
1669 .md5_parse = tcp_v6_parse_md5_keys,
1674 * TCP over IPv4 via INET6 API
1676 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1677 .queue_xmit = ip_queue_xmit,
1678 .send_check = tcp_v4_send_check,
1679 .rebuild_header = inet_sk_rebuild_header,
1680 .sk_rx_dst_set = inet_sk_rx_dst_set,
1681 .conn_request = tcp_v6_conn_request,
1682 .syn_recv_sock = tcp_v6_syn_recv_sock,
1683 .net_header_len = sizeof(struct iphdr),
1684 .setsockopt = ipv6_setsockopt,
1685 .getsockopt = ipv6_getsockopt,
1686 .addr2sockaddr = inet6_csk_addr2sockaddr,
1687 .sockaddr_len = sizeof(struct sockaddr_in6),
1688 .bind_conflict = inet6_csk_bind_conflict,
1689 #ifdef CONFIG_COMPAT
1690 .compat_setsockopt = compat_ipv6_setsockopt,
1691 .compat_getsockopt = compat_ipv6_getsockopt,
1693 .mtu_reduced = tcp_v4_mtu_reduced,
1696 #ifdef CONFIG_TCP_MD5SIG
1697 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1698 .md5_lookup = tcp_v4_md5_lookup,
1699 .calc_md5_hash = tcp_v4_md5_hash_skb,
1700 .md5_parse = tcp_v6_parse_md5_keys,
1704 /* NOTE: A lot of things set to zero explicitly by call to
1705 * sk_alloc() so need not be done here.
1707 static int tcp_v6_init_sock(struct sock *sk)
1709 struct inet_connection_sock *icsk = inet_csk(sk);
1713 icsk->icsk_af_ops = &ipv6_specific;
1715 #ifdef CONFIG_TCP_MD5SIG
1716 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1722 static void tcp_v6_destroy_sock(struct sock *sk)
1724 tcp_v4_destroy_sock(sk);
1725 inet6_destroy_sock(sk);
1728 #ifdef CONFIG_PROC_FS
1729 /* Proc filesystem TCPv6 sock list dumping. */
1730 static void get_openreq6(struct seq_file *seq,
1731 const struct request_sock *req, int i)
1733 long ttd = req->rsk_timer.expires - jiffies;
1734 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1735 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1741 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1742 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1744 src->s6_addr32[0], src->s6_addr32[1],
1745 src->s6_addr32[2], src->s6_addr32[3],
1746 inet_rsk(req)->ir_num,
1747 dest->s6_addr32[0], dest->s6_addr32[1],
1748 dest->s6_addr32[2], dest->s6_addr32[3],
1749 ntohs(inet_rsk(req)->ir_rmt_port),
1751 0, 0, /* could print option size, but that is af dependent. */
1752 1, /* timers active (only the expire timer) */
1753 jiffies_to_clock_t(ttd),
1755 from_kuid_munged(seq_user_ns(seq),
1756 sock_i_uid(req->rsk_listener)),
1757 0, /* non standard timer */
1758 0, /* open_requests have no inode */
1762 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1764 const struct in6_addr *dest, *src;
1767 unsigned long timer_expires;
1768 const struct inet_sock *inet = inet_sk(sp);
1769 const struct tcp_sock *tp = tcp_sk(sp);
1770 const struct inet_connection_sock *icsk = inet_csk(sp);
1771 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1775 dest = &sp->sk_v6_daddr;
1776 src = &sp->sk_v6_rcv_saddr;
1777 destp = ntohs(inet->inet_dport);
1778 srcp = ntohs(inet->inet_sport);
1780 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1781 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
1782 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1784 timer_expires = icsk->icsk_timeout;
1785 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1787 timer_expires = icsk->icsk_timeout;
1788 } else if (timer_pending(&sp->sk_timer)) {
1790 timer_expires = sp->sk_timer.expires;
1793 timer_expires = jiffies;
1796 state = sk_state_load(sp);
1797 if (state == TCP_LISTEN)
1798 rx_queue = sp->sk_ack_backlog;
1800 /* Because we don't lock the socket,
1801 * we might find a transient negative value.
1803 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1806 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1807 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1809 src->s6_addr32[0], src->s6_addr32[1],
1810 src->s6_addr32[2], src->s6_addr32[3], srcp,
1811 dest->s6_addr32[0], dest->s6_addr32[1],
1812 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1814 tp->write_seq - tp->snd_una,
1817 jiffies_delta_to_clock_t(timer_expires - jiffies),
1818 icsk->icsk_retransmits,
1819 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1820 icsk->icsk_probes_out,
1822 atomic_read(&sp->sk_refcnt), sp,
1823 jiffies_to_clock_t(icsk->icsk_rto),
1824 jiffies_to_clock_t(icsk->icsk_ack.ato),
1825 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1827 state == TCP_LISTEN ?
1828 fastopenq->max_qlen :
1829 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1833 static void get_timewait6_sock(struct seq_file *seq,
1834 struct inet_timewait_sock *tw, int i)
1836 long delta = tw->tw_timer.expires - jiffies;
1837 const struct in6_addr *dest, *src;
1840 dest = &tw->tw_v6_daddr;
1841 src = &tw->tw_v6_rcv_saddr;
1842 destp = ntohs(tw->tw_dport);
1843 srcp = ntohs(tw->tw_sport);
1846 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1847 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1849 src->s6_addr32[0], src->s6_addr32[1],
1850 src->s6_addr32[2], src->s6_addr32[3], srcp,
1851 dest->s6_addr32[0], dest->s6_addr32[1],
1852 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1853 tw->tw_substate, 0, 0,
1854 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1855 atomic_read(&tw->tw_refcnt), tw);
1858 static int tcp6_seq_show(struct seq_file *seq, void *v)
1860 struct tcp_iter_state *st;
1861 struct sock *sk = v;
1863 if (v == SEQ_START_TOKEN) {
1868 "st tx_queue rx_queue tr tm->when retrnsmt"
1869 " uid timeout inode\n");
1874 if (sk->sk_state == TCP_TIME_WAIT)
1875 get_timewait6_sock(seq, v, st->num);
1876 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1877 get_openreq6(seq, v, st->num);
1879 get_tcp6_sock(seq, v, st->num);
1884 static const struct file_operations tcp6_afinfo_seq_fops = {
1885 .owner = THIS_MODULE,
1886 .open = tcp_seq_open,
1888 .llseek = seq_lseek,
1889 .release = seq_release_net
1892 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1895 .seq_fops = &tcp6_afinfo_seq_fops,
1897 .show = tcp6_seq_show,
1901 int __net_init tcp6_proc_init(struct net *net)
1903 return tcp_proc_register(net, &tcp6_seq_afinfo);
1906 void tcp6_proc_exit(struct net *net)
1908 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1912 struct proto tcpv6_prot = {
1914 .owner = THIS_MODULE,
1916 .connect = tcp_v6_connect,
1917 .disconnect = tcp_disconnect,
1918 .accept = inet_csk_accept,
1920 .init = tcp_v6_init_sock,
1921 .destroy = tcp_v6_destroy_sock,
1922 .shutdown = tcp_shutdown,
1923 .setsockopt = tcp_setsockopt,
1924 .getsockopt = tcp_getsockopt,
1925 .recvmsg = tcp_recvmsg,
1926 .sendmsg = tcp_sendmsg,
1927 .sendpage = tcp_sendpage,
1928 .backlog_rcv = tcp_v6_do_rcv,
1929 .release_cb = tcp_release_cb,
1931 .unhash = inet_unhash,
1932 .get_port = inet_csk_get_port,
1933 .enter_memory_pressure = tcp_enter_memory_pressure,
1934 .stream_memory_free = tcp_stream_memory_free,
1935 .sockets_allocated = &tcp_sockets_allocated,
1936 .memory_allocated = &tcp_memory_allocated,
1937 .memory_pressure = &tcp_memory_pressure,
1938 .orphan_count = &tcp_orphan_count,
1939 .sysctl_mem = sysctl_tcp_mem,
1940 .sysctl_wmem = sysctl_tcp_wmem,
1941 .sysctl_rmem = sysctl_tcp_rmem,
1942 .max_header = MAX_TCP_HEADER,
1943 .obj_size = sizeof(struct tcp6_sock),
1944 .slab_flags = SLAB_DESTROY_BY_RCU,
1945 .twsk_prot = &tcp6_timewait_sock_ops,
1946 .rsk_prot = &tcp6_request_sock_ops,
1947 .h.hashinfo = &tcp_hashinfo,
1948 .no_autobind = true,
1949 #ifdef CONFIG_COMPAT
1950 .compat_setsockopt = compat_tcp_setsockopt,
1951 .compat_getsockopt = compat_tcp_getsockopt,
1953 .diag_destroy = tcp_abort,
1956 static const struct inet6_protocol tcpv6_protocol = {
1957 .early_demux = tcp_v6_early_demux,
1958 .handler = tcp_v6_rcv,
1959 .err_handler = tcp_v6_err,
1960 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1963 static struct inet_protosw tcpv6_protosw = {
1964 .type = SOCK_STREAM,
1965 .protocol = IPPROTO_TCP,
1966 .prot = &tcpv6_prot,
1967 .ops = &inet6_stream_ops,
1968 .flags = INET_PROTOSW_PERMANENT |
1972 static int __net_init tcpv6_net_init(struct net *net)
1974 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1975 SOCK_RAW, IPPROTO_TCP, net);
1978 static void __net_exit tcpv6_net_exit(struct net *net)
1980 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1983 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1985 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
1988 static struct pernet_operations tcpv6_net_ops = {
1989 .init = tcpv6_net_init,
1990 .exit = tcpv6_net_exit,
1991 .exit_batch = tcpv6_net_exit_batch,
1994 int __init tcpv6_init(void)
1998 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2002 /* register inet6 protocol */
2003 ret = inet6_register_protosw(&tcpv6_protosw);
2005 goto out_tcpv6_protocol;
2007 ret = register_pernet_subsys(&tcpv6_net_ops);
2009 goto out_tcpv6_protosw;
2014 inet6_unregister_protosw(&tcpv6_protosw);
2016 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2020 void tcpv6_exit(void)
2022 unregister_pernet_subsys(&tcpv6_net_ops);
2023 inet6_unregister_protosw(&tcpv6_protosw);
2024 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);