2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res != LWTUNNEL_XMIT_CONTINUE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
132 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
133 struct sk_buff *skb, unsigned int mtu)
135 struct sk_buff *segs, *nskb;
136 netdev_features_t features;
139 /* Please see corresponding comment in ip_finish_output_gso
140 * describing the cases where GSO segment length exceeds the
143 features = netif_skb_features(skb);
144 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
145 if (IS_ERR_OR_NULL(segs)) {
152 skb_list_walk_safe(segs, segs, nskb) {
155 skb_mark_not_on_list(segs);
156 /* Last GSO segment can be smaller than gso_size (and MTU).
157 * Adding a fragment header would produce an "atomic fragment",
158 * which is considered harmful (RFC-8021). Avoid that.
160 err = segs->len > mtu ?
161 ip6_fragment(net, sk, segs, ip6_finish_output2) :
162 ip6_finish_output2(net, sk, segs);
170 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
175 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
181 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
182 /* Policy lookup after SNAT yielded a new policy */
183 if (skb_dst(skb)->xfrm) {
184 IP6CB(skb)->flags |= IP6SKB_REROUTED;
185 return dst_output(net, sk, skb);
189 mtu = ip6_skb_dst_mtu(skb);
190 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
191 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
193 if ((skb->len > mtu && !skb_is_gso(skb)) ||
194 dst_allfrag(skb_dst(skb)) ||
195 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
196 return ip6_fragment(net, sk, skb, ip6_finish_output2);
198 return ip6_finish_output2(net, sk, skb);
201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
203 struct net_device *dev = skb_dst(skb)->dev;
204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
206 skb->protocol = htons(ETH_P_IPV6);
209 if (unlikely(idev->cnf.disable_ipv6)) {
210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
216 net, sk, skb, NULL, dev,
218 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
221 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
223 if (!np->autoflowlabel_set)
224 return ip6_default_np_autolabel(net);
226 return np->autoflowlabel;
230 * xmit an sk_buff (used by TCP, SCTP and DCCP)
231 * Note : socket lock is not held for SYNACK packets, but might be modified
232 * by calls to skb_set_owner_w() and ipv6_local_error(),
233 * which are using proper atomic operations or spinlocks.
235 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
236 __u32 mark, struct ipv6_txoptions *opt, int tclass)
238 struct net *net = sock_net(sk);
239 const struct ipv6_pinfo *np = inet6_sk(sk);
240 struct in6_addr *first_hop = &fl6->daddr;
241 struct dst_entry *dst = skb_dst(skb);
242 unsigned int head_room;
244 u8 proto = fl6->flowi6_proto;
245 int seg_len = skb->len;
249 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
251 head_room += opt->opt_nflen + opt->opt_flen;
253 if (unlikely(skb_headroom(skb) < head_room)) {
254 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
256 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
257 IPSTATS_MIB_OUTDISCARDS);
262 skb_set_owner_w(skb2, skb->sk);
268 seg_len += opt->opt_nflen + opt->opt_flen;
271 ipv6_push_frag_opts(skb, opt, &proto);
274 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
278 skb_push(skb, sizeof(struct ipv6hdr));
279 skb_reset_network_header(skb);
283 * Fill in the IPv6 header
286 hlimit = np->hop_limit;
288 hlimit = ip6_dst_hoplimit(dst);
290 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
291 ip6_autoflowlabel(net, np), fl6));
293 hdr->payload_len = htons(seg_len);
294 hdr->nexthdr = proto;
295 hdr->hop_limit = hlimit;
297 hdr->saddr = fl6->saddr;
298 hdr->daddr = *first_hop;
300 skb->protocol = htons(ETH_P_IPV6);
301 skb->priority = sk->sk_priority;
305 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
306 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
307 IPSTATS_MIB_OUT, skb->len);
309 /* if egress device is enslaved to an L3 master device pass the
310 * skb to its handler for processing
312 skb = l3mdev_ip6_out((struct sock *)sk, skb);
316 /* hooks should never assume socket lock is held.
317 * we promote our socket to non const
319 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
320 net, (struct sock *)sk, skb, NULL, dst->dev,
325 /* ipv6_local_error() does not require socket lock,
326 * we promote our socket to non const
328 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
330 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
334 EXPORT_SYMBOL(ip6_xmit);
336 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
338 struct ip6_ra_chain *ra;
339 struct sock *last = NULL;
341 read_lock(&ip6_ra_lock);
342 for (ra = ip6_ra_chain; ra; ra = ra->next) {
343 struct sock *sk = ra->sk;
344 if (sk && ra->sel == sel &&
345 (!sk->sk_bound_dev_if ||
346 sk->sk_bound_dev_if == skb->dev->ifindex)) {
348 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
350 rawv6_rcv(last, skb2);
357 rawv6_rcv(last, skb);
358 read_unlock(&ip6_ra_lock);
361 read_unlock(&ip6_ra_lock);
365 static int ip6_forward_proxy_check(struct sk_buff *skb)
367 struct ipv6hdr *hdr = ipv6_hdr(skb);
368 u8 nexthdr = hdr->nexthdr;
372 if (ipv6_ext_hdr(nexthdr)) {
373 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
377 offset = sizeof(struct ipv6hdr);
379 if (nexthdr == IPPROTO_ICMPV6) {
380 struct icmp6hdr *icmp6;
382 if (!pskb_may_pull(skb, (skb_network_header(skb) +
383 offset + 1 - skb->data)))
386 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
388 switch (icmp6->icmp6_type) {
389 case NDISC_ROUTER_SOLICITATION:
390 case NDISC_ROUTER_ADVERTISEMENT:
391 case NDISC_NEIGHBOUR_SOLICITATION:
392 case NDISC_NEIGHBOUR_ADVERTISEMENT:
394 /* For reaction involving unicast neighbor discovery
395 * message destined to the proxied address, pass it to
405 * The proxying router can't forward traffic sent to a link-local
406 * address, so signal the sender and discard the packet. This
407 * behavior is clarified by the MIPv6 specification.
409 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
410 dst_link_failure(skb);
417 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
420 struct dst_entry *dst = skb_dst(skb);
422 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
423 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
426 return dst_output(net, sk, skb);
429 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
434 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
435 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
441 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
447 int ip6_forward(struct sk_buff *skb)
449 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
450 struct dst_entry *dst = skb_dst(skb);
451 struct ipv6hdr *hdr = ipv6_hdr(skb);
452 struct inet6_skb_parm *opt = IP6CB(skb);
453 struct net *net = dev_net(dst->dev);
456 if (net->ipv6.devconf_all->forwarding == 0)
459 if (skb->pkt_type != PACKET_HOST)
462 if (unlikely(skb->sk))
465 if (skb_warn_if_lro(skb))
468 if (!net->ipv6.devconf_all->disable_policy &&
469 (!idev || !idev->cnf.disable_policy) &&
470 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
471 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
475 skb_forward_csum(skb);
478 * We DO NOT make any processing on
479 * RA packets, pushing them to user level AS IS
480 * without ane WARRANTY that application will be able
481 * to interpret them. The reason is that we
482 * cannot make anything clever here.
484 * We are not end-node, so that if packet contains
485 * AH/ESP, we cannot make anything.
486 * Defragmentation also would be mistake, RA packets
487 * cannot be fragmented, because there is no warranty
488 * that different fragments will go along one path. --ANK
490 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
491 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
496 * check and decrement ttl
498 if (hdr->hop_limit <= 1) {
499 /* Force OUTPUT device used as source address */
501 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
502 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
508 /* XXX: idev->cnf.proxy_ndp? */
509 if (net->ipv6.devconf_all->proxy_ndp &&
510 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
511 int proxied = ip6_forward_proxy_check(skb);
513 return ip6_input(skb);
514 else if (proxied < 0) {
515 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
520 if (!xfrm6_route_forward(skb)) {
521 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
526 /* IPv6 specs say nothing about it, but it is clear that we cannot
527 send redirects to source routed frames.
528 We don't send redirects to frames decapsulated from IPsec.
530 if (IP6CB(skb)->iif == dst->dev->ifindex &&
531 opt->srcrt == 0 && !skb_sec_path(skb)) {
532 struct in6_addr *target = NULL;
533 struct inet_peer *peer;
537 * incoming and outgoing devices are the same
541 rt = (struct rt6_info *) dst;
542 if (rt->rt6i_flags & RTF_GATEWAY)
543 target = &rt->rt6i_gateway;
545 target = &hdr->daddr;
547 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
549 /* Limit redirects both by destination (here)
550 and by source (inside ndisc_send_redirect)
552 if (inet_peer_xrlim_allow(peer, 1*HZ))
553 ndisc_send_redirect(skb, target);
557 int addrtype = ipv6_addr_type(&hdr->saddr);
559 /* This check is security critical. */
560 if (addrtype == IPV6_ADDR_ANY ||
561 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
563 if (addrtype & IPV6_ADDR_LINKLOCAL) {
564 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
565 ICMPV6_NOT_NEIGHBOUR, 0);
570 mtu = ip6_dst_mtu_forward(dst);
571 if (mtu < IPV6_MIN_MTU)
574 if (ip6_pkt_too_big(skb, mtu)) {
575 /* Again, force OUTPUT device used as source address */
577 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
578 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
579 __IP6_INC_STATS(net, ip6_dst_idev(dst),
580 IPSTATS_MIB_FRAGFAILS);
585 if (skb_cow(skb, dst->dev->hard_header_len)) {
586 __IP6_INC_STATS(net, ip6_dst_idev(dst),
587 IPSTATS_MIB_OUTDISCARDS);
593 /* Mangling hops number delayed to point after skb COW */
597 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
598 net, NULL, skb, skb->dev, dst->dev,
602 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
608 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
610 to->pkt_type = from->pkt_type;
611 to->priority = from->priority;
612 to->protocol = from->protocol;
614 skb_dst_set(to, dst_clone(skb_dst(from)));
616 to->mark = from->mark;
618 skb_copy_hash(to, from);
620 #ifdef CONFIG_NET_SCHED
621 to->tc_index = from->tc_index;
624 skb_copy_secmark(to, from);
627 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
628 int (*output)(struct net *, struct sock *, struct sk_buff *))
630 struct sk_buff *frag;
631 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
632 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
633 inet6_sk(skb->sk) : NULL;
634 struct ipv6hdr *tmp_hdr;
636 unsigned int mtu, hlen, left, len, nexthdr_offset;
639 int ptr, offset = 0, err = 0;
640 u8 *prevhdr, nexthdr = 0;
642 err = ip6_find_1stfragopt(skb, &prevhdr);
647 nexthdr_offset = prevhdr - skb_network_header(skb);
649 mtu = ip6_skb_dst_mtu(skb);
651 /* We must not fragment if the socket is set to force MTU discovery
652 * or if the skb it not generated by a local socket.
654 if (unlikely(!skb->ignore_df && skb->len > mtu))
657 if (IP6CB(skb)->frag_max_size) {
658 if (IP6CB(skb)->frag_max_size > mtu)
661 /* don't send fragments larger than what we received */
662 mtu = IP6CB(skb)->frag_max_size;
663 if (mtu < IPV6_MIN_MTU)
667 if (np && np->frag_size < mtu) {
671 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
673 mtu -= hlen + sizeof(struct frag_hdr);
675 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
676 &ipv6_hdr(skb)->saddr);
678 if (skb->ip_summed == CHECKSUM_PARTIAL &&
679 (err = skb_checksum_help(skb)))
682 prevhdr = skb_network_header(skb) + nexthdr_offset;
683 hroom = LL_RESERVED_SPACE(rt->dst.dev);
684 if (skb_has_frag_list(skb)) {
685 unsigned int first_len = skb_pagelen(skb);
686 struct sk_buff *frag2;
688 if (first_len - hlen > mtu ||
689 ((first_len - hlen) & 7) ||
691 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
694 skb_walk_frags(skb, frag) {
695 /* Correct geometry. */
696 if (frag->len > mtu ||
697 ((frag->len & 7) && frag->next) ||
698 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
699 goto slow_path_clean;
701 /* Partially cloned skb? */
702 if (skb_shared(frag))
703 goto slow_path_clean;
708 frag->destructor = sock_wfree;
710 skb->truesize -= frag->truesize;
717 *prevhdr = NEXTHDR_FRAGMENT;
718 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
723 frag = skb_shinfo(skb)->frag_list;
724 skb_frag_list_init(skb);
726 __skb_pull(skb, hlen);
727 fh = __skb_push(skb, sizeof(struct frag_hdr));
728 __skb_push(skb, hlen);
729 skb_reset_network_header(skb);
730 memcpy(skb_network_header(skb), tmp_hdr, hlen);
732 fh->nexthdr = nexthdr;
734 fh->frag_off = htons(IP6_MF);
735 fh->identification = frag_id;
737 first_len = skb_pagelen(skb);
738 skb->data_len = first_len - skb_headlen(skb);
739 skb->len = first_len;
740 ipv6_hdr(skb)->payload_len = htons(first_len -
741 sizeof(struct ipv6hdr));
743 /* We prevent @rt from being freed. */
747 /* Prepare header of the next frame,
748 * before previous one went down. */
750 frag->ip_summed = CHECKSUM_NONE;
751 skb_reset_transport_header(frag);
752 fh = __skb_push(frag, sizeof(struct frag_hdr));
753 __skb_push(frag, hlen);
754 skb_reset_network_header(frag);
755 memcpy(skb_network_header(frag), tmp_hdr,
757 offset += skb->len - hlen - sizeof(struct frag_hdr);
758 fh->nexthdr = nexthdr;
760 fh->frag_off = htons(offset);
762 fh->frag_off |= htons(IP6_MF);
763 fh->identification = frag_id;
764 ipv6_hdr(frag)->payload_len =
766 sizeof(struct ipv6hdr));
767 ip6_copy_metadata(frag, skb);
770 err = output(net, sk, skb);
772 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
773 IPSTATS_MIB_FRAGCREATES);
786 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
787 IPSTATS_MIB_FRAGOKS);
792 kfree_skb_list(frag);
794 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
795 IPSTATS_MIB_FRAGFAILS);
800 skb_walk_frags(skb, frag2) {
804 frag2->destructor = NULL;
805 skb->truesize += frag2->truesize;
810 left = skb->len - hlen; /* Space per frame */
811 ptr = hlen; /* Where to start from */
814 * Fragment the datagram.
817 troom = rt->dst.dev->needed_tailroom;
820 * Keep copying data until we run out.
823 u8 *fragnexthdr_offset;
826 /* IF: it doesn't fit, use 'mtu' - the data space left */
829 /* IF: we are not sending up to and including the packet end
830 then align the next start on an eight byte boundary */
835 /* Allocate buffer */
836 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
837 hroom + troom, GFP_ATOMIC);
844 * Set up data on packet
847 ip6_copy_metadata(frag, skb);
848 skb_reserve(frag, hroom);
849 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
850 skb_reset_network_header(frag);
851 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
852 frag->transport_header = (frag->network_header + hlen +
853 sizeof(struct frag_hdr));
856 * Charge the memory for the fragment to any owner
860 skb_set_owner_w(frag, skb->sk);
863 * Copy the packet header into the new buffer.
865 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
867 fragnexthdr_offset = skb_network_header(frag);
868 fragnexthdr_offset += prevhdr - skb_network_header(skb);
869 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
872 * Build fragment header.
874 fh->nexthdr = nexthdr;
876 fh->identification = frag_id;
879 * Copy a block of the IP datagram.
881 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
885 fh->frag_off = htons(offset);
887 fh->frag_off |= htons(IP6_MF);
888 ipv6_hdr(frag)->payload_len = htons(frag->len -
889 sizeof(struct ipv6hdr));
895 * Put this fragment into the sending queue.
897 err = output(net, sk, frag);
901 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
902 IPSTATS_MIB_FRAGCREATES);
904 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
905 IPSTATS_MIB_FRAGOKS);
910 if (skb->sk && dst_allfrag(skb_dst(skb)))
911 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
913 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
917 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
918 IPSTATS_MIB_FRAGFAILS);
923 static inline int ip6_rt_check(const struct rt6key *rt_key,
924 const struct in6_addr *fl_addr,
925 const struct in6_addr *addr_cache)
927 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
928 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
931 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
932 struct dst_entry *dst,
933 const struct flowi6 *fl6)
935 struct ipv6_pinfo *np = inet6_sk(sk);
941 if (dst->ops->family != AF_INET6) {
946 rt = (struct rt6_info *)dst;
947 /* Yes, checking route validity in not connected
948 * case is not very simple. Take into account,
949 * that we do not support routing by source, TOS,
950 * and MSG_DONTROUTE --ANK (980726)
952 * 1. ip6_rt_check(): If route was host route,
953 * check that cached destination is current.
954 * If it is network route, we still may
955 * check its validity using saved pointer
956 * to the last used address: daddr_cache.
957 * We do not want to save whole address now,
958 * (because main consumer of this service
959 * is tcp, which has not this problem),
960 * so that the last trick works only on connected
962 * 2. oif also should be the same.
964 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
965 #ifdef CONFIG_IPV6_SUBTREES
966 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
968 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
969 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
978 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
979 struct dst_entry **dst, struct flowi6 *fl6)
981 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
988 /* The correct way to handle this would be to do
989 * ip6_route_get_saddr, and then ip6_route_output; however,
990 * the route-specific preferred source forces the
991 * ip6_route_output call _before_ ip6_route_get_saddr.
993 * In source specific routing (no src=any default route),
994 * ip6_route_output will fail given src=any saddr, though, so
995 * that's why we try it again later.
997 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
998 struct fib6_info *from;
1000 bool had_dst = *dst != NULL;
1003 *dst = ip6_route_output(net, sk, fl6);
1004 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1007 from = rt ? rcu_dereference(rt->from) : NULL;
1008 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1009 sk ? inet6_sk(sk)->srcprefs : 0,
1014 goto out_err_release;
1016 /* If we had an erroneous initial result, pretend it
1017 * never existed and let the SA-enabled version take
1020 if (!had_dst && (*dst)->error) {
1025 if (fl6->flowi6_oif)
1026 flags |= RT6_LOOKUP_F_IFACE;
1030 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1032 err = (*dst)->error;
1034 goto out_err_release;
1036 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1038 * Here if the dst entry we've looked up
1039 * has a neighbour entry that is in the INCOMPLETE
1040 * state and the src address from the flow is
1041 * marked as OPTIMISTIC, we release the found
1042 * dst entry and replace it instead with the
1043 * dst entry of the nexthop router
1045 rt = (struct rt6_info *) *dst;
1047 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1048 rt6_nexthop(rt, &fl6->daddr));
1049 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1050 rcu_read_unlock_bh();
1053 struct inet6_ifaddr *ifp;
1054 struct flowi6 fl_gw6;
1057 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1060 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1066 * We need to get the dst entry for the
1067 * default router instead
1070 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1071 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1072 *dst = ip6_route_output(net, sk, &fl_gw6);
1073 err = (*dst)->error;
1075 goto out_err_release;
1079 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1080 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1081 err = -EAFNOSUPPORT;
1082 goto out_err_release;
1091 if (err == -ENETUNREACH)
1092 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1097 * ip6_dst_lookup - perform route lookup on flow
1098 * @sk: socket which provides route info
1099 * @dst: pointer to dst_entry * for result
1100 * @fl6: flow to lookup
1102 * This function performs a route lookup on the given flow.
1104 * It returns zero on success, or a standard errno code on error.
1106 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1110 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1112 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1115 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1116 * @sk: socket which provides route info
1117 * @fl6: flow to lookup
1118 * @final_dst: final destination address for ipsec lookup
1120 * This function performs a route lookup on the given flow.
1122 * It returns a valid dst pointer on success, or a pointer encoded
1125 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1126 const struct in6_addr *final_dst)
1128 struct dst_entry *dst = NULL;
1131 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1133 return ERR_PTR(err);
1135 fl6->daddr = *final_dst;
1137 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1139 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1142 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1143 * @sk: socket which provides the dst cache and route info
1144 * @fl6: flow to lookup
1145 * @final_dst: final destination address for ipsec lookup
1146 * @connected: whether @sk is connected or not
1148 * This function performs a route lookup on the given flow with the
1149 * possibility of using the cached route in the socket if it is valid.
1150 * It will take the socket dst lock when operating on the dst cache.
1151 * As a result, this function can only be used in process context.
1153 * In addition, for a connected socket, cache the dst in the socket
1154 * if the current cache is not valid.
1156 * It returns a valid dst pointer on success, or a pointer encoded
1159 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1160 const struct in6_addr *final_dst,
1163 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1165 dst = ip6_sk_dst_check(sk, dst, fl6);
1169 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1170 if (connected && !IS_ERR(dst))
1171 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1175 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1177 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1180 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1183 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1186 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1189 static void ip6_append_data_mtu(unsigned int *mtu,
1191 unsigned int fragheaderlen,
1192 struct sk_buff *skb,
1193 struct rt6_info *rt,
1194 unsigned int orig_mtu)
1196 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1198 /* first fragment, reserve header_len */
1199 *mtu = orig_mtu - rt->dst.header_len;
1203 * this fragment is not first, the headers
1204 * space is regarded as data space.
1208 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1209 + fragheaderlen - sizeof(struct frag_hdr);
1213 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1214 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1215 struct rt6_info *rt, struct flowi6 *fl6)
1217 struct ipv6_pinfo *np = inet6_sk(sk);
1219 struct ipv6_txoptions *opt = ipc6->opt;
1225 if (WARN_ON(v6_cork->opt))
1228 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1229 if (unlikely(!v6_cork->opt))
1232 v6_cork->opt->tot_len = sizeof(*opt);
1233 v6_cork->opt->opt_flen = opt->opt_flen;
1234 v6_cork->opt->opt_nflen = opt->opt_nflen;
1236 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1238 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1241 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1243 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1246 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1248 if (opt->hopopt && !v6_cork->opt->hopopt)
1251 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1253 if (opt->srcrt && !v6_cork->opt->srcrt)
1256 /* need source address above miyazawa*/
1259 cork->base.dst = &rt->dst;
1260 cork->fl.u.ip6 = *fl6;
1261 v6_cork->hop_limit = ipc6->hlimit;
1262 v6_cork->tclass = ipc6->tclass;
1263 if (rt->dst.flags & DST_XFRM_TUNNEL)
1264 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1265 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1267 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1268 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1269 if (np->frag_size < mtu) {
1271 mtu = np->frag_size;
1273 cork->base.fragsize = mtu;
1274 cork->base.gso_size = ipc6->gso_size;
1275 cork->base.tx_flags = 0;
1276 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1278 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1279 cork->base.flags |= IPCORK_ALLFRAG;
1280 cork->base.length = 0;
1282 cork->base.transmit_time = ipc6->sockc.transmit_time;
1287 static int __ip6_append_data(struct sock *sk,
1289 struct sk_buff_head *queue,
1290 struct inet_cork *cork,
1291 struct inet6_cork *v6_cork,
1292 struct page_frag *pfrag,
1293 int getfrag(void *from, char *to, int offset,
1294 int len, int odd, struct sk_buff *skb),
1295 void *from, int length, int transhdrlen,
1296 unsigned int flags, struct ipcm6_cookie *ipc6)
1298 struct sk_buff *skb, *skb_prev = NULL;
1299 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1301 int dst_exthdrlen = 0;
1307 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1308 struct ipv6_txoptions *opt = v6_cork->opt;
1309 int csummode = CHECKSUM_NONE;
1310 unsigned int maxnonfragsize, headersize;
1311 unsigned int wmem_alloc_delta = 0;
1314 skb = skb_peek_tail(queue);
1316 exthdrlen = opt ? opt->opt_flen : 0;
1317 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1320 paged = !!cork->gso_size;
1321 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1324 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1325 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1326 tskey = sk->sk_tskey++;
1328 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1330 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1331 (opt ? opt->opt_nflen : 0);
1333 headersize = sizeof(struct ipv6hdr) +
1334 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1335 (dst_allfrag(&rt->dst) ?
1336 sizeof(struct frag_hdr) : 0) +
1337 rt->rt6i_nfheader_len;
1339 if (mtu <= fragheaderlen ||
1340 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1343 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1344 sizeof(struct frag_hdr);
1346 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1347 * the first fragment
1349 if (headersize + transhdrlen > mtu)
1352 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1353 (sk->sk_protocol == IPPROTO_UDP ||
1354 sk->sk_protocol == IPPROTO_RAW)) {
1355 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1356 sizeof(struct ipv6hdr));
1360 if (ip6_sk_ignore_df(sk))
1361 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1363 maxnonfragsize = mtu;
1365 if (cork->length + length > maxnonfragsize - headersize) {
1367 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1368 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1372 /* CHECKSUM_PARTIAL only with no extension headers and when
1373 * we are not going to fragment
1375 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1376 headersize == sizeof(struct ipv6hdr) &&
1377 length <= mtu - headersize &&
1378 (!(flags & MSG_MORE) || cork->gso_size) &&
1379 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1380 csummode = CHECKSUM_PARTIAL;
1383 * Let's try using as much space as possible.
1384 * Use MTU if total length of the message fits into the MTU.
1385 * Otherwise, we need to reserve fragment header and
1386 * fragment alignment (= 8-15 octects, in total).
1388 * Note that we may need to "move" the data from the tail of
1389 * of the buffer to the new fragment when we split
1392 * FIXME: It may be fragmented into multiple chunks
1393 * at once if non-fragmentable extension headers
1398 cork->length += length;
1402 while (length > 0) {
1403 /* Check if the remaining data fits into current packet. */
1404 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1406 copy = maxfraglen - skb->len;
1410 unsigned int datalen;
1411 unsigned int fraglen;
1412 unsigned int fraggap;
1413 unsigned int alloclen, alloc_extra;
1414 unsigned int pagedlen;
1416 /* There's no room in the current skb */
1418 fraggap = skb->len - maxfraglen;
1421 /* update mtu and maxfraglen if necessary */
1422 if (!skb || !skb_prev)
1423 ip6_append_data_mtu(&mtu, &maxfraglen,
1424 fragheaderlen, skb, rt,
1430 * If remaining data exceeds the mtu,
1431 * we know we need more fragment(s).
1433 datalen = length + fraggap;
1435 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1436 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1437 fraglen = datalen + fragheaderlen;
1440 alloc_extra = hh_len;
1441 alloc_extra += dst_exthdrlen;
1442 alloc_extra += rt->dst.trailer_len;
1444 /* We just reserve space for fragment header.
1445 * Note: this may be overallocation if the message
1446 * (without MSG_MORE) fits into the MTU.
1448 alloc_extra += sizeof(struct frag_hdr);
1450 if ((flags & MSG_MORE) &&
1451 !(rt->dst.dev->features&NETIF_F_SG))
1454 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1455 !(rt->dst.dev->features & NETIF_F_SG)))
1458 alloclen = min_t(int, fraglen, MAX_HEADER);
1459 pagedlen = fraglen - alloclen;
1461 alloclen += alloc_extra;
1463 if (datalen != length + fraggap) {
1465 * this is not the last fragment, the trailer
1466 * space is regarded as data space.
1468 datalen += rt->dst.trailer_len;
1471 fraglen = datalen + fragheaderlen;
1473 copy = datalen - transhdrlen - fraggap - pagedlen;
1479 skb = sock_alloc_send_skb(sk, alloclen,
1480 (flags & MSG_DONTWAIT), &err);
1483 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1485 skb = alloc_skb(alloclen,
1493 * Fill in the control structures
1495 skb->protocol = htons(ETH_P_IPV6);
1496 skb->ip_summed = csummode;
1498 /* reserve for fragmentation and ipsec header */
1499 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1502 /* Only the initial fragment is time stamped */
1503 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1505 skb_shinfo(skb)->tskey = tskey;
1509 * Find where to start putting bytes
1511 data = skb_put(skb, fraglen - pagedlen);
1512 skb_set_network_header(skb, exthdrlen);
1513 data += fragheaderlen;
1514 skb->transport_header = (skb->network_header +
1517 skb->csum = skb_copy_and_csum_bits(
1518 skb_prev, maxfraglen,
1519 data + transhdrlen, fraggap, 0);
1520 skb_prev->csum = csum_sub(skb_prev->csum,
1523 pskb_trim_unique(skb_prev, maxfraglen);
1526 getfrag(from, data + transhdrlen, offset,
1527 copy, fraggap, skb) < 0) {
1534 length -= copy + transhdrlen;
1539 if ((flags & MSG_CONFIRM) && !skb_prev)
1540 skb_set_dst_pending_confirm(skb, 1);
1543 * Put the packet on the pending queue
1545 if (!skb->destructor) {
1546 skb->destructor = sock_wfree;
1548 wmem_alloc_delta += skb->truesize;
1550 __skb_queue_tail(queue, skb);
1557 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1558 skb_tailroom(skb) >= copy) {
1562 if (getfrag(from, skb_put(skb, copy),
1563 offset, copy, off, skb) < 0) {
1564 __skb_trim(skb, off);
1569 int i = skb_shinfo(skb)->nr_frags;
1572 if (!sk_page_frag_refill(sk, pfrag))
1575 if (!skb_can_coalesce(skb, i, pfrag->page,
1578 if (i == MAX_SKB_FRAGS)
1581 __skb_fill_page_desc(skb, i, pfrag->page,
1583 skb_shinfo(skb)->nr_frags = ++i;
1584 get_page(pfrag->page);
1586 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1588 page_address(pfrag->page) + pfrag->offset,
1589 offset, copy, skb->len, skb) < 0)
1592 pfrag->offset += copy;
1593 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1595 skb->data_len += copy;
1596 skb->truesize += copy;
1597 wmem_alloc_delta += copy;
1603 if (wmem_alloc_delta)
1604 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1610 cork->length -= length;
1611 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1612 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1616 int ip6_append_data(struct sock *sk,
1617 int getfrag(void *from, char *to, int offset, int len,
1618 int odd, struct sk_buff *skb),
1619 void *from, int length, int transhdrlen,
1620 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1621 struct rt6_info *rt, unsigned int flags)
1623 struct inet_sock *inet = inet_sk(sk);
1624 struct ipv6_pinfo *np = inet6_sk(sk);
1628 if (flags&MSG_PROBE)
1630 if (skb_queue_empty(&sk->sk_write_queue)) {
1634 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1639 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1640 length += exthdrlen;
1641 transhdrlen += exthdrlen;
1643 fl6 = &inet->cork.fl.u.ip6;
1647 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1648 &np->cork, sk_page_frag(sk), getfrag,
1649 from, length, transhdrlen, flags, ipc6);
1651 EXPORT_SYMBOL_GPL(ip6_append_data);
1653 static void ip6_cork_release(struct inet_cork_full *cork,
1654 struct inet6_cork *v6_cork)
1657 kfree(v6_cork->opt->dst0opt);
1658 kfree(v6_cork->opt->dst1opt);
1659 kfree(v6_cork->opt->hopopt);
1660 kfree(v6_cork->opt->srcrt);
1661 kfree(v6_cork->opt);
1662 v6_cork->opt = NULL;
1665 if (cork->base.dst) {
1666 dst_release(cork->base.dst);
1667 cork->base.dst = NULL;
1668 cork->base.flags &= ~IPCORK_ALLFRAG;
1670 memset(&cork->fl, 0, sizeof(cork->fl));
1673 struct sk_buff *__ip6_make_skb(struct sock *sk,
1674 struct sk_buff_head *queue,
1675 struct inet_cork_full *cork,
1676 struct inet6_cork *v6_cork)
1678 struct sk_buff *skb, *tmp_skb;
1679 struct sk_buff **tail_skb;
1680 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1681 struct ipv6_pinfo *np = inet6_sk(sk);
1682 struct net *net = sock_net(sk);
1683 struct ipv6hdr *hdr;
1684 struct ipv6_txoptions *opt = v6_cork->opt;
1685 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1686 struct flowi6 *fl6 = &cork->fl.u.ip6;
1687 unsigned char proto = fl6->flowi6_proto;
1689 skb = __skb_dequeue(queue);
1692 tail_skb = &(skb_shinfo(skb)->frag_list);
1694 /* move skb->data to ip header from ext header */
1695 if (skb->data < skb_network_header(skb))
1696 __skb_pull(skb, skb_network_offset(skb));
1697 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1698 __skb_pull(tmp_skb, skb_network_header_len(skb));
1699 *tail_skb = tmp_skb;
1700 tail_skb = &(tmp_skb->next);
1701 skb->len += tmp_skb->len;
1702 skb->data_len += tmp_skb->len;
1703 skb->truesize += tmp_skb->truesize;
1704 tmp_skb->destructor = NULL;
1708 /* Allow local fragmentation. */
1709 skb->ignore_df = ip6_sk_ignore_df(sk);
1711 *final_dst = fl6->daddr;
1712 __skb_pull(skb, skb_network_header_len(skb));
1713 if (opt && opt->opt_flen)
1714 ipv6_push_frag_opts(skb, opt, &proto);
1715 if (opt && opt->opt_nflen)
1716 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1718 skb_push(skb, sizeof(struct ipv6hdr));
1719 skb_reset_network_header(skb);
1720 hdr = ipv6_hdr(skb);
1722 ip6_flow_hdr(hdr, v6_cork->tclass,
1723 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1724 ip6_autoflowlabel(net, np), fl6));
1725 hdr->hop_limit = v6_cork->hop_limit;
1726 hdr->nexthdr = proto;
1727 hdr->saddr = fl6->saddr;
1728 hdr->daddr = *final_dst;
1730 skb->priority = sk->sk_priority;
1731 skb->mark = sk->sk_mark;
1733 skb->tstamp = cork->base.transmit_time;
1735 skb_dst_set(skb, dst_clone(&rt->dst));
1736 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1737 if (proto == IPPROTO_ICMPV6) {
1738 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1741 if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
1742 icmp6_type = fl6->fl6_icmp_type;
1744 icmp6_type = icmp6_hdr(skb)->icmp6_type;
1745 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
1746 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1749 ip6_cork_release(cork, v6_cork);
1754 int ip6_send_skb(struct sk_buff *skb)
1756 struct net *net = sock_net(skb->sk);
1757 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1760 err = ip6_local_out(net, skb->sk, skb);
1763 err = net_xmit_errno(err);
1765 IP6_INC_STATS(net, rt->rt6i_idev,
1766 IPSTATS_MIB_OUTDISCARDS);
1772 int ip6_push_pending_frames(struct sock *sk)
1774 struct sk_buff *skb;
1776 skb = ip6_finish_skb(sk);
1780 return ip6_send_skb(skb);
1782 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1784 static void __ip6_flush_pending_frames(struct sock *sk,
1785 struct sk_buff_head *queue,
1786 struct inet_cork_full *cork,
1787 struct inet6_cork *v6_cork)
1789 struct sk_buff *skb;
1791 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1793 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1794 IPSTATS_MIB_OUTDISCARDS);
1798 ip6_cork_release(cork, v6_cork);
1801 void ip6_flush_pending_frames(struct sock *sk)
1803 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1804 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1806 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1808 struct sk_buff *ip6_make_skb(struct sock *sk,
1809 int getfrag(void *from, char *to, int offset,
1810 int len, int odd, struct sk_buff *skb),
1811 void *from, int length, int transhdrlen,
1812 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1813 struct rt6_info *rt, unsigned int flags,
1814 struct inet_cork_full *cork)
1816 struct inet6_cork v6_cork;
1817 struct sk_buff_head queue;
1818 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1821 if (flags & MSG_PROBE)
1824 __skb_queue_head_init(&queue);
1826 cork->base.flags = 0;
1827 cork->base.addr = 0;
1828 cork->base.opt = NULL;
1829 cork->base.dst = NULL;
1831 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1833 ip6_cork_release(cork, &v6_cork);
1834 return ERR_PTR(err);
1836 if (ipc6->dontfrag < 0)
1837 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1839 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1840 ¤t->task_frag, getfrag, from,
1841 length + exthdrlen, transhdrlen + exthdrlen,
1844 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1845 return ERR_PTR(err);
1848 return __ip6_make_skb(sk, &queue, cork, &v6_cork);