2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 #include <net/l3mdev.h>
60 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
62 struct dst_entry *dst = skb_dst(skb);
63 struct net_device *dev = dst->dev;
64 struct neighbour *neigh;
65 struct in6_addr *nexthop;
68 skb->protocol = htons(ETH_P_IPV6);
71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
72 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
74 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
75 ((mroute6_socket(net, skb) &&
76 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
77 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
78 &ipv6_hdr(skb)->saddr))) {
79 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
85 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
86 net, sk, newskb, NULL, newskb->dev,
89 if (ipv6_hdr(skb)->hop_limit == 0) {
90 IP6_INC_STATS(net, idev,
91 IPSTATS_MIB_OUTDISCARDS);
97 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100 IPV6_ADDR_SCOPE_NODELOCAL &&
101 !(dev->flags & IFF_LOOPBACK)) {
108 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
109 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110 if (unlikely(!neigh))
111 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112 if (!IS_ERR(neigh)) {
113 ret = dst_neigh_output(dst, neigh, skb);
114 rcu_read_unlock_bh();
117 rcu_read_unlock_bh();
119 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
126 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
127 dst_allfrag(skb_dst(skb)) ||
128 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
129 return ip6_fragment(net, sk, skb, ip6_finish_output2);
131 return ip6_finish_output2(net, sk, skb);
134 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
136 struct net_device *dev = skb_dst(skb)->dev;
137 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139 if (unlikely(idev->cnf.disable_ipv6)) {
140 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
145 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
146 net, sk, skb, NULL, dev,
148 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
151 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
153 if (!np->autoflowlabel_set)
154 return ip6_default_np_autolabel(net);
156 return np->autoflowlabel;
160 * xmit an sk_buff (used by TCP, SCTP and DCCP)
161 * Note : socket lock is not held for SYNACK packets, but might be modified
162 * by calls to skb_set_owner_w() and ipv6_local_error(),
163 * which are using proper atomic operations or spinlocks.
165 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
166 struct ipv6_txoptions *opt, int tclass)
168 struct net *net = sock_net(sk);
169 const struct ipv6_pinfo *np = inet6_sk(sk);
170 struct in6_addr *first_hop = &fl6->daddr;
171 struct dst_entry *dst = skb_dst(skb);
172 unsigned int head_room;
174 u8 proto = fl6->flowi6_proto;
175 int seg_len = skb->len;
179 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
181 head_room += opt->opt_nflen + opt->opt_flen;
183 if (unlikely(skb_headroom(skb) < head_room)) {
184 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
186 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
187 IPSTATS_MIB_OUTDISCARDS);
192 skb_set_owner_w(skb2, skb->sk);
198 seg_len += opt->opt_nflen + opt->opt_flen;
201 ipv6_push_frag_opts(skb, opt, &proto);
204 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
207 skb_push(skb, sizeof(struct ipv6hdr));
208 skb_reset_network_header(skb);
212 * Fill in the IPv6 header
215 hlimit = np->hop_limit;
217 hlimit = ip6_dst_hoplimit(dst);
219 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
220 ip6_autoflowlabel(net, np), fl6));
222 hdr->payload_len = htons(seg_len);
223 hdr->nexthdr = proto;
224 hdr->hop_limit = hlimit;
226 hdr->saddr = fl6->saddr;
227 hdr->daddr = *first_hop;
229 skb->protocol = htons(ETH_P_IPV6);
230 skb->priority = sk->sk_priority;
231 skb->mark = sk->sk_mark;
234 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
235 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
236 IPSTATS_MIB_OUT, skb->len);
237 /* hooks should never assume socket lock is held.
238 * we promote our socket to non const
240 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
241 net, (struct sock *)sk, skb, NULL, dst->dev,
246 /* ipv6_local_error() does not require socket lock,
247 * we promote our socket to non const
249 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
251 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
255 EXPORT_SYMBOL(ip6_xmit);
257 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
259 struct ip6_ra_chain *ra;
260 struct sock *last = NULL;
262 read_lock(&ip6_ra_lock);
263 for (ra = ip6_ra_chain; ra; ra = ra->next) {
264 struct sock *sk = ra->sk;
265 if (sk && ra->sel == sel &&
266 (!sk->sk_bound_dev_if ||
267 sk->sk_bound_dev_if == skb->dev->ifindex)) {
269 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
271 rawv6_rcv(last, skb2);
278 rawv6_rcv(last, skb);
279 read_unlock(&ip6_ra_lock);
282 read_unlock(&ip6_ra_lock);
286 static int ip6_forward_proxy_check(struct sk_buff *skb)
288 struct ipv6hdr *hdr = ipv6_hdr(skb);
289 u8 nexthdr = hdr->nexthdr;
293 if (ipv6_ext_hdr(nexthdr)) {
294 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
298 offset = sizeof(struct ipv6hdr);
300 if (nexthdr == IPPROTO_ICMPV6) {
301 struct icmp6hdr *icmp6;
303 if (!pskb_may_pull(skb, (skb_network_header(skb) +
304 offset + 1 - skb->data)))
307 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
309 switch (icmp6->icmp6_type) {
310 case NDISC_ROUTER_SOLICITATION:
311 case NDISC_ROUTER_ADVERTISEMENT:
312 case NDISC_NEIGHBOUR_SOLICITATION:
313 case NDISC_NEIGHBOUR_ADVERTISEMENT:
315 /* For reaction involving unicast neighbor discovery
316 * message destined to the proxied address, pass it to
326 * The proxying router can't forward traffic sent to a link-local
327 * address, so signal the sender and discard the packet. This
328 * behavior is clarified by the MIPv6 specification.
330 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
331 dst_link_failure(skb);
338 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
341 struct dst_entry *dst = skb_dst(skb);
343 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
344 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
345 skb_sender_cpu_clear(skb);
346 return dst_output(net, sk, skb);
349 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
352 struct inet6_dev *idev;
354 if (dst_metric_locked(dst, RTAX_MTU)) {
355 mtu = dst_metric_raw(dst, RTAX_MTU);
362 idev = __in6_dev_get(dst->dev);
364 mtu = idev->cnf.mtu6;
370 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
375 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
376 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
382 if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
388 int ip6_forward(struct sk_buff *skb)
390 struct dst_entry *dst = skb_dst(skb);
391 struct ipv6hdr *hdr = ipv6_hdr(skb);
392 struct inet6_skb_parm *opt = IP6CB(skb);
393 struct net *net = dev_net(dst->dev);
396 if (net->ipv6.devconf_all->forwarding == 0)
399 if (skb->pkt_type != PACKET_HOST)
402 if (unlikely(skb->sk))
405 if (skb_warn_if_lro(skb))
408 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
409 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
410 IPSTATS_MIB_INDISCARDS);
414 skb_forward_csum(skb);
417 * We DO NOT make any processing on
418 * RA packets, pushing them to user level AS IS
419 * without ane WARRANTY that application will be able
420 * to interpret them. The reason is that we
421 * cannot make anything clever here.
423 * We are not end-node, so that if packet contains
424 * AH/ESP, we cannot make anything.
425 * Defragmentation also would be mistake, RA packets
426 * cannot be fragmented, because there is no warranty
427 * that different fragments will go along one path. --ANK
429 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
430 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
435 * check and decrement ttl
437 if (hdr->hop_limit <= 1) {
438 /* Force OUTPUT device used as source address */
440 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
441 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
442 IPSTATS_MIB_INHDRERRORS);
448 /* XXX: idev->cnf.proxy_ndp? */
449 if (net->ipv6.devconf_all->proxy_ndp &&
450 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
451 int proxied = ip6_forward_proxy_check(skb);
453 return ip6_input(skb);
454 else if (proxied < 0) {
455 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
456 IPSTATS_MIB_INDISCARDS);
461 if (!xfrm6_route_forward(skb)) {
462 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
463 IPSTATS_MIB_INDISCARDS);
468 /* IPv6 specs say nothing about it, but it is clear that we cannot
469 send redirects to source routed frames.
470 We don't send redirects to frames decapsulated from IPsec.
472 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
473 struct in6_addr *target = NULL;
474 struct inet_peer *peer;
478 * incoming and outgoing devices are the same
482 rt = (struct rt6_info *) dst;
483 if (rt->rt6i_flags & RTF_GATEWAY)
484 target = &rt->rt6i_gateway;
486 target = &hdr->daddr;
488 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
490 /* Limit redirects both by destination (here)
491 and by source (inside ndisc_send_redirect)
493 if (inet_peer_xrlim_allow(peer, 1*HZ))
494 ndisc_send_redirect(skb, target);
498 int addrtype = ipv6_addr_type(&hdr->saddr);
500 /* This check is security critical. */
501 if (addrtype == IPV6_ADDR_ANY ||
502 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
504 if (addrtype & IPV6_ADDR_LINKLOCAL) {
505 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
506 ICMPV6_NOT_NEIGHBOUR, 0);
511 mtu = ip6_dst_mtu_forward(dst);
512 if (mtu < IPV6_MIN_MTU)
515 if (ip6_pkt_too_big(skb, mtu)) {
516 /* Again, force OUTPUT device used as source address */
518 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
519 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
520 IPSTATS_MIB_INTOOBIGERRORS);
521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
522 IPSTATS_MIB_FRAGFAILS);
527 if (skb_cow(skb, dst->dev->hard_header_len)) {
528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
529 IPSTATS_MIB_OUTDISCARDS);
535 /* Mangling hops number delayed to point after skb COW */
539 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
540 net, NULL, skb, skb->dev, dst->dev,
544 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
550 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
556 skb_dst_set(to, dst_clone(skb_dst(from)));
558 to->mark = from->mark;
560 skb_copy_hash(to, from);
562 #ifdef CONFIG_NET_SCHED
563 to->tc_index = from->tc_index;
566 skb_copy_secmark(to, from);
569 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
570 int (*output)(struct net *, struct sock *, struct sk_buff *))
572 struct sk_buff *frag;
573 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
574 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
575 inet6_sk(skb->sk) : NULL;
576 struct ipv6hdr *tmp_hdr;
578 unsigned int mtu, hlen, left, len, nexthdr_offset;
581 int ptr, offset = 0, err = 0;
582 u8 *prevhdr, nexthdr = 0;
584 err = ip6_find_1stfragopt(skb, &prevhdr);
589 nexthdr_offset = prevhdr - skb_network_header(skb);
591 mtu = ip6_skb_dst_mtu(skb);
593 /* We must not fragment if the socket is set to force MTU discovery
594 * or if the skb it not generated by a local socket.
596 if (unlikely(!skb->ignore_df && skb->len > mtu))
599 if (IP6CB(skb)->frag_max_size) {
600 if (IP6CB(skb)->frag_max_size > mtu)
603 /* don't send fragments larger than what we received */
604 mtu = IP6CB(skb)->frag_max_size;
605 if (mtu < IPV6_MIN_MTU)
609 if (np && np->frag_size < mtu) {
613 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
615 mtu -= hlen + sizeof(struct frag_hdr);
617 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
618 &ipv6_hdr(skb)->saddr);
620 if (skb->ip_summed == CHECKSUM_PARTIAL &&
621 (err = skb_checksum_help(skb)))
624 prevhdr = skb_network_header(skb) + nexthdr_offset;
625 hroom = LL_RESERVED_SPACE(rt->dst.dev);
626 if (skb_has_frag_list(skb)) {
627 int first_len = skb_pagelen(skb);
628 struct sk_buff *frag2;
630 if (first_len - hlen > mtu ||
631 ((first_len - hlen) & 7) ||
633 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
636 skb_walk_frags(skb, frag) {
637 /* Correct geometry. */
638 if (frag->len > mtu ||
639 ((frag->len & 7) && frag->next) ||
640 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
641 goto slow_path_clean;
643 /* Partially cloned skb? */
644 if (skb_shared(frag))
645 goto slow_path_clean;
650 frag->destructor = sock_wfree;
652 skb->truesize -= frag->truesize;
659 *prevhdr = NEXTHDR_FRAGMENT;
660 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
665 frag = skb_shinfo(skb)->frag_list;
666 skb_frag_list_init(skb);
668 __skb_pull(skb, hlen);
669 fh = (struct frag_hdr *)__skb_push(skb, sizeof(struct frag_hdr));
670 __skb_push(skb, hlen);
671 skb_reset_network_header(skb);
672 memcpy(skb_network_header(skb), tmp_hdr, hlen);
674 fh->nexthdr = nexthdr;
676 fh->frag_off = htons(IP6_MF);
677 fh->identification = frag_id;
679 first_len = skb_pagelen(skb);
680 skb->data_len = first_len - skb_headlen(skb);
681 skb->len = first_len;
682 ipv6_hdr(skb)->payload_len = htons(first_len -
683 sizeof(struct ipv6hdr));
688 /* Prepare header of the next frame,
689 * before previous one went down. */
691 frag->ip_summed = CHECKSUM_NONE;
692 skb_reset_transport_header(frag);
693 fh = (struct frag_hdr *)__skb_push(frag, sizeof(struct frag_hdr));
694 __skb_push(frag, hlen);
695 skb_reset_network_header(frag);
696 memcpy(skb_network_header(frag), tmp_hdr,
698 offset += skb->len - hlen - sizeof(struct frag_hdr);
699 fh->nexthdr = nexthdr;
701 fh->frag_off = htons(offset);
703 fh->frag_off |= htons(IP6_MF);
704 fh->identification = frag_id;
705 ipv6_hdr(frag)->payload_len =
707 sizeof(struct ipv6hdr));
708 ip6_copy_metadata(frag, skb);
711 err = output(net, sk, skb);
713 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
714 IPSTATS_MIB_FRAGCREATES);
727 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
728 IPSTATS_MIB_FRAGOKS);
733 kfree_skb_list(frag);
735 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
736 IPSTATS_MIB_FRAGFAILS);
741 skb_walk_frags(skb, frag2) {
745 frag2->destructor = NULL;
746 skb->truesize += frag2->truesize;
751 left = skb->len - hlen; /* Space per frame */
752 ptr = hlen; /* Where to start from */
755 * Fragment the datagram.
758 troom = rt->dst.dev->needed_tailroom;
761 * Keep copying data until we run out.
764 u8 *fragnexthdr_offset;
767 /* IF: it doesn't fit, use 'mtu' - the data space left */
770 /* IF: we are not sending up to and including the packet end
771 then align the next start on an eight byte boundary */
776 /* Allocate buffer */
777 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
778 hroom + troom, GFP_ATOMIC);
785 * Set up data on packet
788 ip6_copy_metadata(frag, skb);
789 skb_reserve(frag, hroom);
790 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
791 skb_reset_network_header(frag);
792 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
793 frag->transport_header = (frag->network_header + hlen +
794 sizeof(struct frag_hdr));
797 * Charge the memory for the fragment to any owner
801 skb_set_owner_w(frag, skb->sk);
804 * Copy the packet header into the new buffer.
806 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
808 fragnexthdr_offset = skb_network_header(frag);
809 fragnexthdr_offset += prevhdr - skb_network_header(skb);
810 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
813 * Build fragment header.
815 fh->nexthdr = nexthdr;
817 fh->identification = frag_id;
820 * Copy a block of the IP datagram.
822 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
826 fh->frag_off = htons(offset);
828 fh->frag_off |= htons(IP6_MF);
829 ipv6_hdr(frag)->payload_len = htons(frag->len -
830 sizeof(struct ipv6hdr));
836 * Put this fragment into the sending queue.
838 err = output(net, sk, frag);
842 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
843 IPSTATS_MIB_FRAGCREATES);
845 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
846 IPSTATS_MIB_FRAGOKS);
851 if (skb->sk && dst_allfrag(skb_dst(skb)))
852 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
854 skb->dev = skb_dst(skb)->dev;
855 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
859 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
860 IPSTATS_MIB_FRAGFAILS);
865 static inline int ip6_rt_check(const struct rt6key *rt_key,
866 const struct in6_addr *fl_addr,
867 const struct in6_addr *addr_cache)
869 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
870 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
873 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
874 struct dst_entry *dst,
875 const struct flowi6 *fl6)
877 struct ipv6_pinfo *np = inet6_sk(sk);
883 if (dst->ops->family != AF_INET6) {
888 rt = (struct rt6_info *)dst;
889 /* Yes, checking route validity in not connected
890 * case is not very simple. Take into account,
891 * that we do not support routing by source, TOS,
892 * and MSG_DONTROUTE --ANK (980726)
894 * 1. ip6_rt_check(): If route was host route,
895 * check that cached destination is current.
896 * If it is network route, we still may
897 * check its validity using saved pointer
898 * to the last used address: daddr_cache.
899 * We do not want to save whole address now,
900 * (because main consumer of this service
901 * is tcp, which has not this problem),
902 * so that the last trick works only on connected
904 * 2. oif also should be the same.
906 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
907 #ifdef CONFIG_IPV6_SUBTREES
908 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
911 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
920 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
921 struct dst_entry **dst, struct flowi6 *fl6)
923 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
930 /* The correct way to handle this would be to do
931 * ip6_route_get_saddr, and then ip6_route_output; however,
932 * the route-specific preferred source forces the
933 * ip6_route_output call _before_ ip6_route_get_saddr.
935 * In source specific routing (no src=any default route),
936 * ip6_route_output will fail given src=any saddr, though, so
937 * that's why we try it again later.
939 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
941 bool had_dst = *dst != NULL;
944 *dst = ip6_route_output(net, sk, fl6);
945 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
946 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
947 sk ? inet6_sk(sk)->srcprefs : 0,
950 goto out_err_release;
952 /* If we had an erroneous initial result, pretend it
953 * never existed and let the SA-enabled version take
956 if (!had_dst && (*dst)->error) {
962 flags |= RT6_LOOKUP_F_IFACE;
966 *dst = ip6_route_output_flags(net, sk, fl6, flags);
970 goto out_err_release;
972 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
974 * Here if the dst entry we've looked up
975 * has a neighbour entry that is in the INCOMPLETE
976 * state and the src address from the flow is
977 * marked as OPTIMISTIC, we release the found
978 * dst entry and replace it instead with the
979 * dst entry of the nexthop router
981 rt = (struct rt6_info *) *dst;
983 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
984 rt6_nexthop(rt, &fl6->daddr));
985 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
986 rcu_read_unlock_bh();
989 struct inet6_ifaddr *ifp;
990 struct flowi6 fl_gw6;
993 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
996 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1002 * We need to get the dst entry for the
1003 * default router instead
1006 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1007 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1008 *dst = ip6_route_output(net, sk, &fl_gw6);
1009 err = (*dst)->error;
1011 goto out_err_release;
1015 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1016 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1017 err = -EAFNOSUPPORT;
1018 goto out_err_release;
1024 if (err == -ENETUNREACH)
1025 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1032 * ip6_dst_lookup - perform route lookup on flow
1033 * @sk: socket which provides route info
1034 * @dst: pointer to dst_entry * for result
1035 * @fl6: flow to lookup
1037 * This function performs a route lookup on the given flow.
1039 * It returns zero on success, or a standard errno code on error.
1041 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1045 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1047 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1050 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1051 * @sk: socket which provides route info
1052 * @fl6: flow to lookup
1053 * @final_dst: final destination address for ipsec lookup
1055 * This function performs a route lookup on the given flow.
1057 * It returns a valid dst pointer on success, or a pointer encoded
1060 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1061 const struct in6_addr *final_dst)
1063 struct dst_entry *dst = NULL;
1066 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1068 return ERR_PTR(err);
1070 fl6->daddr = *final_dst;
1071 if (!fl6->flowi6_oif)
1072 fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
1074 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1076 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1079 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1080 * @sk: socket which provides the dst cache and route info
1081 * @fl6: flow to lookup
1082 * @final_dst: final destination address for ipsec lookup
1084 * This function performs a route lookup on the given flow with the
1085 * possibility of using the cached route in the socket if it is valid.
1086 * It will take the socket dst lock when operating on the dst cache.
1087 * As a result, this function can only be used in process context.
1089 * It returns a valid dst pointer on success, or a pointer encoded
1092 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1093 const struct in6_addr *final_dst)
1095 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1097 dst = ip6_sk_dst_check(sk, dst, fl6);
1099 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1103 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1105 static inline int ip6_ufo_append_data(struct sock *sk,
1106 struct sk_buff_head *queue,
1107 int getfrag(void *from, char *to, int offset, int len,
1108 int odd, struct sk_buff *skb),
1109 void *from, int length, int hh_len, int fragheaderlen,
1110 int exthdrlen, int transhdrlen, int mtu,
1111 unsigned int flags, const struct flowi6 *fl6)
1114 struct sk_buff *skb;
1117 /* There is support for UDP large send offload by network
1118 * device, so create one single skb packet containing complete
1121 skb = skb_peek_tail(queue);
1123 skb = sock_alloc_send_skb(sk,
1124 hh_len + fragheaderlen + transhdrlen + 20,
1125 (flags & MSG_DONTWAIT), &err);
1129 /* reserve space for Hardware header */
1130 skb_reserve(skb, hh_len);
1132 /* create space for UDP/IP header */
1133 skb_put(skb, fragheaderlen + transhdrlen);
1135 /* initialize network header pointer */
1136 skb_set_network_header(skb, exthdrlen);
1138 /* initialize protocol header pointer */
1139 skb->transport_header = skb->network_header + fragheaderlen;
1141 skb->protocol = htons(ETH_P_IPV6);
1144 __skb_queue_tail(queue, skb);
1145 } else if (skb_is_gso(skb)) {
1149 skb->ip_summed = CHECKSUM_PARTIAL;
1150 /* Specify the length of each IPv6 datagram fragment.
1151 * It has to be a multiple of 8.
1153 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1154 sizeof(struct frag_hdr)) & ~7;
1155 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1156 skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk),
1161 return skb_append_datato_frags(sk, skb, getfrag, from,
1162 (length - transhdrlen));
1165 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1168 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1171 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1174 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1177 static void ip6_append_data_mtu(unsigned int *mtu,
1179 unsigned int fragheaderlen,
1180 struct sk_buff *skb,
1181 struct rt6_info *rt,
1182 unsigned int orig_mtu)
1184 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1186 /* first fragment, reserve header_len */
1187 *mtu = orig_mtu - rt->dst.header_len;
1191 * this fragment is not first, the headers
1192 * space is regarded as data space.
1196 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1197 + fragheaderlen - sizeof(struct frag_hdr);
1201 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1202 struct inet6_cork *v6_cork,
1203 int hlimit, int tclass, struct ipv6_txoptions *opt,
1204 struct rt6_info *rt, struct flowi6 *fl6)
1206 struct ipv6_pinfo *np = inet6_sk(sk);
1213 if (WARN_ON(v6_cork->opt))
1216 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1217 if (unlikely(!v6_cork->opt))
1220 v6_cork->opt->tot_len = sizeof(*opt);
1221 v6_cork->opt->opt_flen = opt->opt_flen;
1222 v6_cork->opt->opt_nflen = opt->opt_nflen;
1224 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1226 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1229 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1231 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1234 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1236 if (opt->hopopt && !v6_cork->opt->hopopt)
1239 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1241 if (opt->srcrt && !v6_cork->opt->srcrt)
1244 /* need source address above miyazawa*/
1247 cork->base.dst = &rt->dst;
1248 cork->fl.u.ip6 = *fl6;
1249 v6_cork->hop_limit = hlimit;
1250 v6_cork->tclass = tclass;
1251 if (rt->dst.flags & DST_XFRM_TUNNEL)
1252 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1253 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1255 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1256 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
1257 if (np->frag_size < mtu) {
1259 mtu = np->frag_size;
1261 if (mtu < IPV6_MIN_MTU)
1263 cork->base.fragsize = mtu;
1264 if (dst_allfrag(rt->dst.path))
1265 cork->base.flags |= IPCORK_ALLFRAG;
1266 cork->base.length = 0;
1271 static int __ip6_append_data(struct sock *sk,
1273 struct sk_buff_head *queue,
1274 struct inet_cork *cork,
1275 struct inet6_cork *v6_cork,
1276 struct page_frag *pfrag,
1277 int getfrag(void *from, char *to, int offset,
1278 int len, int odd, struct sk_buff *skb),
1279 void *from, int length, int transhdrlen,
1280 unsigned int flags, int dontfrag)
1282 struct sk_buff *skb, *skb_prev = NULL;
1283 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1285 int dst_exthdrlen = 0;
1292 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1293 struct ipv6_txoptions *opt = v6_cork->opt;
1294 int csummode = CHECKSUM_NONE;
1295 unsigned int maxnonfragsize, headersize;
1297 skb = skb_peek_tail(queue);
1299 exthdrlen = opt ? opt->opt_flen : 0;
1300 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1303 mtu = cork->fragsize;
1306 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1308 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1309 (opt ? opt->opt_nflen : 0);
1310 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1311 sizeof(struct frag_hdr);
1313 headersize = sizeof(struct ipv6hdr) +
1314 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1315 (dst_allfrag(&rt->dst) ?
1316 sizeof(struct frag_hdr) : 0) +
1317 rt->rt6i_nfheader_len;
1319 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1320 * the first fragment
1322 if (headersize + transhdrlen > mtu)
1325 if (cork->length + length > mtu - headersize && dontfrag &&
1326 (sk->sk_protocol == IPPROTO_UDP ||
1327 sk->sk_protocol == IPPROTO_RAW)) {
1328 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1329 sizeof(struct ipv6hdr));
1333 if (ip6_sk_ignore_df(sk))
1334 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1336 maxnonfragsize = mtu;
1338 if (cork->length + length > maxnonfragsize - headersize) {
1340 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1341 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1345 /* CHECKSUM_PARTIAL only with no extension headers and when
1346 * we are not going to fragment
1348 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1349 headersize == sizeof(struct ipv6hdr) &&
1350 length < mtu - headersize &&
1351 !(flags & MSG_MORE) &&
1352 rt->dst.dev->features & NETIF_F_V6_CSUM)
1353 csummode = CHECKSUM_PARTIAL;
1355 if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
1356 sock_tx_timestamp(sk, &tx_flags);
1357 if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
1358 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1359 tskey = sk->sk_tskey++;
1363 * Let's try using as much space as possible.
1364 * Use MTU if total length of the message fits into the MTU.
1365 * Otherwise, we need to reserve fragment header and
1366 * fragment alignment (= 8-15 octects, in total).
1368 * Note that we may need to "move" the data from the tail of
1369 * of the buffer to the new fragment when we split
1372 * FIXME: It may be fragmented into multiple chunks
1373 * at once if non-fragmentable extension headers
1378 cork->length += length;
1379 if ((skb && skb_is_gso(skb)) ||
1380 (((length + (skb ? skb->len : headersize)) > mtu) &&
1381 (skb_queue_len(queue) <= 1) &&
1382 (sk->sk_protocol == IPPROTO_UDP) &&
1383 (rt->dst.dev->features & NETIF_F_UFO) &&
1384 (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
1385 err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
1386 hh_len, fragheaderlen, exthdrlen,
1387 transhdrlen, mtu, flags, fl6);
1396 while (length > 0) {
1397 /* Check if the remaining data fits into current packet. */
1398 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1400 copy = maxfraglen - skb->len;
1404 unsigned int datalen;
1405 unsigned int fraglen;
1406 unsigned int fraggap;
1407 unsigned int alloclen;
1409 /* There's no room in the current skb */
1411 fraggap = skb->len - maxfraglen;
1414 /* update mtu and maxfraglen if necessary */
1415 if (!skb || !skb_prev)
1416 ip6_append_data_mtu(&mtu, &maxfraglen,
1417 fragheaderlen, skb, rt,
1423 * If remaining data exceeds the mtu,
1424 * we know we need more fragment(s).
1426 datalen = length + fraggap;
1428 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1429 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1430 if ((flags & MSG_MORE) &&
1431 !(rt->dst.dev->features&NETIF_F_SG))
1434 alloclen = datalen + fragheaderlen;
1436 alloclen += dst_exthdrlen;
1438 if (datalen != length + fraggap) {
1440 * this is not the last fragment, the trailer
1441 * space is regarded as data space.
1443 datalen += rt->dst.trailer_len;
1446 alloclen += rt->dst.trailer_len;
1447 fraglen = datalen + fragheaderlen;
1450 * We just reserve space for fragment header.
1451 * Note: this may be overallocation if the message
1452 * (without MSG_MORE) fits into the MTU.
1454 alloclen += sizeof(struct frag_hdr);
1456 copy = datalen - transhdrlen - fraggap;
1462 skb = sock_alloc_send_skb(sk,
1464 (flags & MSG_DONTWAIT), &err);
1467 if (atomic_read(&sk->sk_wmem_alloc) <=
1469 skb = sock_wmalloc(sk,
1470 alloclen + hh_len, 1,
1478 * Fill in the control structures
1480 skb->protocol = htons(ETH_P_IPV6);
1481 skb->ip_summed = csummode;
1483 /* reserve for fragmentation and ipsec header */
1484 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1487 /* Only the initial fragment is time stamped */
1488 skb_shinfo(skb)->tx_flags = tx_flags;
1490 skb_shinfo(skb)->tskey = tskey;
1494 * Find where to start putting bytes
1496 data = skb_put(skb, fraglen);
1497 skb_set_network_header(skb, exthdrlen);
1498 data += fragheaderlen;
1499 skb->transport_header = (skb->network_header +
1502 skb->csum = skb_copy_and_csum_bits(
1503 skb_prev, maxfraglen,
1504 data + transhdrlen, fraggap, 0);
1505 skb_prev->csum = csum_sub(skb_prev->csum,
1508 pskb_trim_unique(skb_prev, maxfraglen);
1511 getfrag(from, data + transhdrlen, offset,
1512 copy, fraggap, skb) < 0) {
1519 length -= datalen - fraggap;
1525 * Put the packet on the pending queue
1527 __skb_queue_tail(queue, skb);
1534 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1535 skb_tailroom(skb) >= copy) {
1539 if (getfrag(from, skb_put(skb, copy),
1540 offset, copy, off, skb) < 0) {
1541 __skb_trim(skb, off);
1546 int i = skb_shinfo(skb)->nr_frags;
1549 if (!sk_page_frag_refill(sk, pfrag))
1552 if (!skb_can_coalesce(skb, i, pfrag->page,
1555 if (i == MAX_SKB_FRAGS)
1558 __skb_fill_page_desc(skb, i, pfrag->page,
1560 skb_shinfo(skb)->nr_frags = ++i;
1561 get_page(pfrag->page);
1563 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1565 page_address(pfrag->page) + pfrag->offset,
1566 offset, copy, skb->len, skb) < 0)
1569 pfrag->offset += copy;
1570 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1572 skb->data_len += copy;
1573 skb->truesize += copy;
1574 atomic_add(copy, &sk->sk_wmem_alloc);
1585 cork->length -= length;
1586 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1590 int ip6_append_data(struct sock *sk,
1591 int getfrag(void *from, char *to, int offset, int len,
1592 int odd, struct sk_buff *skb),
1593 void *from, int length, int transhdrlen, int hlimit,
1594 int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1595 struct rt6_info *rt, unsigned int flags, int dontfrag)
1597 struct inet_sock *inet = inet_sk(sk);
1598 struct ipv6_pinfo *np = inet6_sk(sk);
1602 if (flags&MSG_PROBE)
1604 if (skb_queue_empty(&sk->sk_write_queue)) {
1608 err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
1609 tclass, opt, rt, fl6);
1613 exthdrlen = (opt ? opt->opt_flen : 0);
1614 length += exthdrlen;
1615 transhdrlen += exthdrlen;
1617 fl6 = &inet->cork.fl.u.ip6;
1621 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1622 &np->cork, sk_page_frag(sk), getfrag,
1623 from, length, transhdrlen, flags, dontfrag);
1625 EXPORT_SYMBOL_GPL(ip6_append_data);
1627 static void ip6_cork_release(struct inet_cork_full *cork,
1628 struct inet6_cork *v6_cork)
1631 kfree(v6_cork->opt->dst0opt);
1632 kfree(v6_cork->opt->dst1opt);
1633 kfree(v6_cork->opt->hopopt);
1634 kfree(v6_cork->opt->srcrt);
1635 kfree(v6_cork->opt);
1636 v6_cork->opt = NULL;
1639 if (cork->base.dst) {
1640 dst_release(cork->base.dst);
1641 cork->base.dst = NULL;
1642 cork->base.flags &= ~IPCORK_ALLFRAG;
1644 memset(&cork->fl, 0, sizeof(cork->fl));
1647 struct sk_buff *__ip6_make_skb(struct sock *sk,
1648 struct sk_buff_head *queue,
1649 struct inet_cork_full *cork,
1650 struct inet6_cork *v6_cork)
1652 struct sk_buff *skb, *tmp_skb;
1653 struct sk_buff **tail_skb;
1654 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1655 struct ipv6_pinfo *np = inet6_sk(sk);
1656 struct net *net = sock_net(sk);
1657 struct ipv6hdr *hdr;
1658 struct ipv6_txoptions *opt = v6_cork->opt;
1659 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1660 struct flowi6 *fl6 = &cork->fl.u.ip6;
1661 unsigned char proto = fl6->flowi6_proto;
1663 skb = __skb_dequeue(queue);
1666 tail_skb = &(skb_shinfo(skb)->frag_list);
1668 /* move skb->data to ip header from ext header */
1669 if (skb->data < skb_network_header(skb))
1670 __skb_pull(skb, skb_network_offset(skb));
1671 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1672 __skb_pull(tmp_skb, skb_network_header_len(skb));
1673 *tail_skb = tmp_skb;
1674 tail_skb = &(tmp_skb->next);
1675 skb->len += tmp_skb->len;
1676 skb->data_len += tmp_skb->len;
1677 skb->truesize += tmp_skb->truesize;
1678 tmp_skb->destructor = NULL;
1682 /* Allow local fragmentation. */
1683 skb->ignore_df = ip6_sk_ignore_df(sk);
1685 *final_dst = fl6->daddr;
1686 __skb_pull(skb, skb_network_header_len(skb));
1687 if (opt && opt->opt_flen)
1688 ipv6_push_frag_opts(skb, opt, &proto);
1689 if (opt && opt->opt_nflen)
1690 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1692 skb_push(skb, sizeof(struct ipv6hdr));
1693 skb_reset_network_header(skb);
1694 hdr = ipv6_hdr(skb);
1696 ip6_flow_hdr(hdr, v6_cork->tclass,
1697 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1698 ip6_autoflowlabel(net, np), fl6));
1699 hdr->hop_limit = v6_cork->hop_limit;
1700 hdr->nexthdr = proto;
1701 hdr->saddr = fl6->saddr;
1702 hdr->daddr = *final_dst;
1704 skb->priority = sk->sk_priority;
1705 skb->mark = sk->sk_mark;
1707 skb_dst_set(skb, dst_clone(&rt->dst));
1708 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1709 if (proto == IPPROTO_ICMPV6) {
1710 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1712 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1713 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1716 ip6_cork_release(cork, v6_cork);
1721 int ip6_send_skb(struct sk_buff *skb)
1723 struct net *net = sock_net(skb->sk);
1724 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1727 err = ip6_local_out(net, skb->sk, skb);
1730 err = net_xmit_errno(err);
1732 IP6_INC_STATS(net, rt->rt6i_idev,
1733 IPSTATS_MIB_OUTDISCARDS);
1739 int ip6_push_pending_frames(struct sock *sk)
1741 struct sk_buff *skb;
1743 skb = ip6_finish_skb(sk);
1747 return ip6_send_skb(skb);
1749 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1751 static void __ip6_flush_pending_frames(struct sock *sk,
1752 struct sk_buff_head *queue,
1753 struct inet_cork_full *cork,
1754 struct inet6_cork *v6_cork)
1756 struct sk_buff *skb;
1758 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1760 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1761 IPSTATS_MIB_OUTDISCARDS);
1765 ip6_cork_release(cork, v6_cork);
1768 void ip6_flush_pending_frames(struct sock *sk)
1770 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1771 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1773 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1775 struct sk_buff *ip6_make_skb(struct sock *sk,
1776 int getfrag(void *from, char *to, int offset,
1777 int len, int odd, struct sk_buff *skb),
1778 void *from, int length, int transhdrlen,
1779 int hlimit, int tclass,
1780 struct ipv6_txoptions *opt, struct flowi6 *fl6,
1781 struct rt6_info *rt, unsigned int flags,
1784 struct inet_cork_full cork;
1785 struct inet6_cork v6_cork;
1786 struct sk_buff_head queue;
1787 int exthdrlen = (opt ? opt->opt_flen : 0);
1790 if (flags & MSG_PROBE)
1793 __skb_queue_head_init(&queue);
1795 cork.base.flags = 0;
1797 cork.base.opt = NULL;
1798 cork.base.dst = NULL;
1800 err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
1802 ip6_cork_release(&cork, &v6_cork);
1803 return ERR_PTR(err);
1807 dontfrag = inet6_sk(sk)->dontfrag;
1809 err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
1810 ¤t->task_frag, getfrag, from,
1811 length + exthdrlen, transhdrlen + exthdrlen,
1814 __ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
1815 return ERR_PTR(err);
1818 return __ip6_make_skb(sk, &queue, &cork, &v6_cork);