2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
132 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
133 struct sk_buff *skb, unsigned int mtu)
135 struct sk_buff *segs, *nskb;
136 netdev_features_t features;
139 /* Please see corresponding comment in ip_finish_output_gso
140 * describing the cases where GSO segment length exceeds the
143 features = netif_skb_features(skb);
144 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
145 if (IS_ERR_OR_NULL(segs)) {
152 skb_list_walk_safe(segs, segs, nskb) {
155 skb_mark_not_on_list(segs);
156 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
164 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
169 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
175 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
176 /* Policy lookup after SNAT yielded a new policy */
177 if (skb_dst(skb)->xfrm) {
178 IPCB(skb)->flags |= IPSKB_REROUTED;
179 return dst_output(net, sk, skb);
183 mtu = ip6_skb_dst_mtu(skb);
184 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
185 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
187 if ((skb->len > mtu && !skb_is_gso(skb)) ||
188 dst_allfrag(skb_dst(skb)) ||
189 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
190 return ip6_fragment(net, sk, skb, ip6_finish_output2);
192 return ip6_finish_output2(net, sk, skb);
195 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
197 struct net_device *dev = skb_dst(skb)->dev;
198 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
200 skb->protocol = htons(ETH_P_IPV6);
203 if (unlikely(idev->cnf.disable_ipv6)) {
204 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
209 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
210 net, sk, skb, NULL, dev,
212 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
215 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
217 if (!np->autoflowlabel_set)
218 return ip6_default_np_autolabel(net);
220 return np->autoflowlabel;
224 * xmit an sk_buff (used by TCP, SCTP and DCCP)
225 * Note : socket lock is not held for SYNACK packets, but might be modified
226 * by calls to skb_set_owner_w() and ipv6_local_error(),
227 * which are using proper atomic operations or spinlocks.
229 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 __u32 mark, struct ipv6_txoptions *opt, int tclass)
232 struct net *net = sock_net(sk);
233 const struct ipv6_pinfo *np = inet6_sk(sk);
234 struct in6_addr *first_hop = &fl6->daddr;
235 struct dst_entry *dst = skb_dst(skb);
236 unsigned int head_room;
238 u8 proto = fl6->flowi6_proto;
239 int seg_len = skb->len;
243 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
245 head_room += opt->opt_nflen + opt->opt_flen;
247 if (unlikely(skb_headroom(skb) < head_room)) {
248 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
250 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
251 IPSTATS_MIB_OUTDISCARDS);
256 skb_set_owner_w(skb2, skb->sk);
262 seg_len += opt->opt_nflen + opt->opt_flen;
265 ipv6_push_frag_opts(skb, opt, &proto);
268 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
272 skb_push(skb, sizeof(struct ipv6hdr));
273 skb_reset_network_header(skb);
277 * Fill in the IPv6 header
280 hlimit = np->hop_limit;
282 hlimit = ip6_dst_hoplimit(dst);
284 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
285 ip6_autoflowlabel(net, np), fl6));
287 hdr->payload_len = htons(seg_len);
288 hdr->nexthdr = proto;
289 hdr->hop_limit = hlimit;
291 hdr->saddr = fl6->saddr;
292 hdr->daddr = *first_hop;
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->priority = sk->sk_priority;
299 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
300 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
301 IPSTATS_MIB_OUT, skb->len);
303 /* if egress device is enslaved to an L3 master device pass the
304 * skb to its handler for processing
306 skb = l3mdev_ip6_out((struct sock *)sk, skb);
310 /* hooks should never assume socket lock is held.
311 * we promote our socket to non const
313 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
314 net, (struct sock *)sk, skb, NULL, dst->dev,
319 /* ipv6_local_error() does not require socket lock,
320 * we promote our socket to non const
322 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
324 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
328 EXPORT_SYMBOL(ip6_xmit);
330 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332 struct ip6_ra_chain *ra;
333 struct sock *last = NULL;
335 read_lock(&ip6_ra_lock);
336 for (ra = ip6_ra_chain; ra; ra = ra->next) {
337 struct sock *sk = ra->sk;
338 if (sk && ra->sel == sel &&
339 (!sk->sk_bound_dev_if ||
340 sk->sk_bound_dev_if == skb->dev->ifindex)) {
342 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344 rawv6_rcv(last, skb2);
351 rawv6_rcv(last, skb);
352 read_unlock(&ip6_ra_lock);
355 read_unlock(&ip6_ra_lock);
359 static int ip6_forward_proxy_check(struct sk_buff *skb)
361 struct ipv6hdr *hdr = ipv6_hdr(skb);
362 u8 nexthdr = hdr->nexthdr;
366 if (ipv6_ext_hdr(nexthdr)) {
367 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
371 offset = sizeof(struct ipv6hdr);
373 if (nexthdr == IPPROTO_ICMPV6) {
374 struct icmp6hdr *icmp6;
376 if (!pskb_may_pull(skb, (skb_network_header(skb) +
377 offset + 1 - skb->data)))
380 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
382 switch (icmp6->icmp6_type) {
383 case NDISC_ROUTER_SOLICITATION:
384 case NDISC_ROUTER_ADVERTISEMENT:
385 case NDISC_NEIGHBOUR_SOLICITATION:
386 case NDISC_NEIGHBOUR_ADVERTISEMENT:
388 /* For reaction involving unicast neighbor discovery
389 * message destined to the proxied address, pass it to
399 * The proxying router can't forward traffic sent to a link-local
400 * address, so signal the sender and discard the packet. This
401 * behavior is clarified by the MIPv6 specification.
403 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
404 dst_link_failure(skb);
411 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
414 struct dst_entry *dst = skb_dst(skb);
416 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
417 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
420 return dst_output(net, sk, skb);
423 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
428 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
429 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
435 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
441 int ip6_forward(struct sk_buff *skb)
443 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
444 struct dst_entry *dst = skb_dst(skb);
445 struct ipv6hdr *hdr = ipv6_hdr(skb);
446 struct inet6_skb_parm *opt = IP6CB(skb);
447 struct net *net = dev_net(dst->dev);
450 if (net->ipv6.devconf_all->forwarding == 0)
453 if (skb->pkt_type != PACKET_HOST)
456 if (unlikely(skb->sk))
459 if (skb_warn_if_lro(skb))
462 if (!net->ipv6.devconf_all->disable_policy &&
463 !idev->cnf.disable_policy &&
464 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
465 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
469 skb_forward_csum(skb);
472 * We DO NOT make any processing on
473 * RA packets, pushing them to user level AS IS
474 * without ane WARRANTY that application will be able
475 * to interpret them. The reason is that we
476 * cannot make anything clever here.
478 * We are not end-node, so that if packet contains
479 * AH/ESP, we cannot make anything.
480 * Defragmentation also would be mistake, RA packets
481 * cannot be fragmented, because there is no warranty
482 * that different fragments will go along one path. --ANK
484 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
485 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
490 * check and decrement ttl
492 if (hdr->hop_limit <= 1) {
493 /* Force OUTPUT device used as source address */
495 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
496 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
502 /* XXX: idev->cnf.proxy_ndp? */
503 if (net->ipv6.devconf_all->proxy_ndp &&
504 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
505 int proxied = ip6_forward_proxy_check(skb);
507 return ip6_input(skb);
508 else if (proxied < 0) {
509 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
514 if (!xfrm6_route_forward(skb)) {
515 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
520 /* IPv6 specs say nothing about it, but it is clear that we cannot
521 send redirects to source routed frames.
522 We don't send redirects to frames decapsulated from IPsec.
524 if (IP6CB(skb)->iif == dst->dev->ifindex &&
525 opt->srcrt == 0 && !skb_sec_path(skb)) {
526 struct in6_addr *target = NULL;
527 struct inet_peer *peer;
531 * incoming and outgoing devices are the same
535 rt = (struct rt6_info *) dst;
536 if (rt->rt6i_flags & RTF_GATEWAY)
537 target = &rt->rt6i_gateway;
539 target = &hdr->daddr;
541 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
543 /* Limit redirects both by destination (here)
544 and by source (inside ndisc_send_redirect)
546 if (inet_peer_xrlim_allow(peer, 1*HZ))
547 ndisc_send_redirect(skb, target);
551 int addrtype = ipv6_addr_type(&hdr->saddr);
553 /* This check is security critical. */
554 if (addrtype == IPV6_ADDR_ANY ||
555 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
557 if (addrtype & IPV6_ADDR_LINKLOCAL) {
558 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
559 ICMPV6_NOT_NEIGHBOUR, 0);
564 mtu = ip6_dst_mtu_forward(dst);
565 if (mtu < IPV6_MIN_MTU)
568 if (ip6_pkt_too_big(skb, mtu)) {
569 /* Again, force OUTPUT device used as source address */
571 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
572 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
573 __IP6_INC_STATS(net, ip6_dst_idev(dst),
574 IPSTATS_MIB_FRAGFAILS);
579 if (skb_cow(skb, dst->dev->hard_header_len)) {
580 __IP6_INC_STATS(net, ip6_dst_idev(dst),
581 IPSTATS_MIB_OUTDISCARDS);
587 /* Mangling hops number delayed to point after skb COW */
591 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
592 net, NULL, skb, skb->dev, dst->dev,
596 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
602 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
604 to->pkt_type = from->pkt_type;
605 to->priority = from->priority;
606 to->protocol = from->protocol;
608 skb_dst_set(to, dst_clone(skb_dst(from)));
610 to->mark = from->mark;
612 skb_copy_hash(to, from);
614 #ifdef CONFIG_NET_SCHED
615 to->tc_index = from->tc_index;
618 skb_copy_secmark(to, from);
621 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
622 int (*output)(struct net *, struct sock *, struct sk_buff *))
624 struct sk_buff *frag;
625 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
626 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
627 inet6_sk(skb->sk) : NULL;
628 struct ipv6hdr *tmp_hdr;
630 unsigned int mtu, hlen, left, len, nexthdr_offset;
633 int ptr, offset = 0, err = 0;
634 u8 *prevhdr, nexthdr = 0;
636 err = ip6_find_1stfragopt(skb, &prevhdr);
641 nexthdr_offset = prevhdr - skb_network_header(skb);
643 mtu = ip6_skb_dst_mtu(skb);
645 /* We must not fragment if the socket is set to force MTU discovery
646 * or if the skb it not generated by a local socket.
648 if (unlikely(!skb->ignore_df && skb->len > mtu))
651 if (IP6CB(skb)->frag_max_size) {
652 if (IP6CB(skb)->frag_max_size > mtu)
655 /* don't send fragments larger than what we received */
656 mtu = IP6CB(skb)->frag_max_size;
657 if (mtu < IPV6_MIN_MTU)
661 if (np && np->frag_size < mtu) {
665 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
667 mtu -= hlen + sizeof(struct frag_hdr);
669 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
670 &ipv6_hdr(skb)->saddr);
672 if (skb->ip_summed == CHECKSUM_PARTIAL &&
673 (err = skb_checksum_help(skb)))
676 prevhdr = skb_network_header(skb) + nexthdr_offset;
677 hroom = LL_RESERVED_SPACE(rt->dst.dev);
678 if (skb_has_frag_list(skb)) {
679 unsigned int first_len = skb_pagelen(skb);
680 struct sk_buff *frag2;
682 if (first_len - hlen > mtu ||
683 ((first_len - hlen) & 7) ||
685 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
688 skb_walk_frags(skb, frag) {
689 /* Correct geometry. */
690 if (frag->len > mtu ||
691 ((frag->len & 7) && frag->next) ||
692 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
693 goto slow_path_clean;
695 /* Partially cloned skb? */
696 if (skb_shared(frag))
697 goto slow_path_clean;
702 frag->destructor = sock_wfree;
704 skb->truesize -= frag->truesize;
711 *prevhdr = NEXTHDR_FRAGMENT;
712 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
717 frag = skb_shinfo(skb)->frag_list;
718 skb_frag_list_init(skb);
720 __skb_pull(skb, hlen);
721 fh = __skb_push(skb, sizeof(struct frag_hdr));
722 __skb_push(skb, hlen);
723 skb_reset_network_header(skb);
724 memcpy(skb_network_header(skb), tmp_hdr, hlen);
726 fh->nexthdr = nexthdr;
728 fh->frag_off = htons(IP6_MF);
729 fh->identification = frag_id;
731 first_len = skb_pagelen(skb);
732 skb->data_len = first_len - skb_headlen(skb);
733 skb->len = first_len;
734 ipv6_hdr(skb)->payload_len = htons(first_len -
735 sizeof(struct ipv6hdr));
738 /* Prepare header of the next frame,
739 * before previous one went down. */
741 frag->ip_summed = CHECKSUM_NONE;
742 skb_reset_transport_header(frag);
743 fh = __skb_push(frag, sizeof(struct frag_hdr));
744 __skb_push(frag, hlen);
745 skb_reset_network_header(frag);
746 memcpy(skb_network_header(frag), tmp_hdr,
748 offset += skb->len - hlen - sizeof(struct frag_hdr);
749 fh->nexthdr = nexthdr;
751 fh->frag_off = htons(offset);
753 fh->frag_off |= htons(IP6_MF);
754 fh->identification = frag_id;
755 ipv6_hdr(frag)->payload_len =
757 sizeof(struct ipv6hdr));
758 ip6_copy_metadata(frag, skb);
761 err = output(net, sk, skb);
763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
764 IPSTATS_MIB_FRAGCREATES);
777 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
778 IPSTATS_MIB_FRAGOKS);
782 kfree_skb_list(frag);
784 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
785 IPSTATS_MIB_FRAGFAILS);
789 skb_walk_frags(skb, frag2) {
793 frag2->destructor = NULL;
794 skb->truesize += frag2->truesize;
799 left = skb->len - hlen; /* Space per frame */
800 ptr = hlen; /* Where to start from */
803 * Fragment the datagram.
806 troom = rt->dst.dev->needed_tailroom;
809 * Keep copying data until we run out.
812 u8 *fragnexthdr_offset;
815 /* IF: it doesn't fit, use 'mtu' - the data space left */
818 /* IF: we are not sending up to and including the packet end
819 then align the next start on an eight byte boundary */
824 /* Allocate buffer */
825 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
826 hroom + troom, GFP_ATOMIC);
833 * Set up data on packet
836 ip6_copy_metadata(frag, skb);
837 skb_reserve(frag, hroom);
838 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
839 skb_reset_network_header(frag);
840 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
841 frag->transport_header = (frag->network_header + hlen +
842 sizeof(struct frag_hdr));
845 * Charge the memory for the fragment to any owner
849 skb_set_owner_w(frag, skb->sk);
852 * Copy the packet header into the new buffer.
854 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
856 fragnexthdr_offset = skb_network_header(frag);
857 fragnexthdr_offset += prevhdr - skb_network_header(skb);
858 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
861 * Build fragment header.
863 fh->nexthdr = nexthdr;
865 fh->identification = frag_id;
868 * Copy a block of the IP datagram.
870 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
874 fh->frag_off = htons(offset);
876 fh->frag_off |= htons(IP6_MF);
877 ipv6_hdr(frag)->payload_len = htons(frag->len -
878 sizeof(struct ipv6hdr));
884 * Put this fragment into the sending queue.
886 err = output(net, sk, frag);
890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
891 IPSTATS_MIB_FRAGCREATES);
893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
894 IPSTATS_MIB_FRAGOKS);
899 if (skb->sk && dst_allfrag(skb_dst(skb)))
900 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
902 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
906 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
907 IPSTATS_MIB_FRAGFAILS);
912 static inline int ip6_rt_check(const struct rt6key *rt_key,
913 const struct in6_addr *fl_addr,
914 const struct in6_addr *addr_cache)
916 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
917 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
920 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
921 struct dst_entry *dst,
922 const struct flowi6 *fl6)
924 struct ipv6_pinfo *np = inet6_sk(sk);
930 if (dst->ops->family != AF_INET6) {
935 rt = (struct rt6_info *)dst;
936 /* Yes, checking route validity in not connected
937 * case is not very simple. Take into account,
938 * that we do not support routing by source, TOS,
939 * and MSG_DONTROUTE --ANK (980726)
941 * 1. ip6_rt_check(): If route was host route,
942 * check that cached destination is current.
943 * If it is network route, we still may
944 * check its validity using saved pointer
945 * to the last used address: daddr_cache.
946 * We do not want to save whole address now,
947 * (because main consumer of this service
948 * is tcp, which has not this problem),
949 * so that the last trick works only on connected
951 * 2. oif also should be the same.
953 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
954 #ifdef CONFIG_IPV6_SUBTREES
955 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
957 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
958 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
967 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
968 struct dst_entry **dst, struct flowi6 *fl6)
970 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
977 /* The correct way to handle this would be to do
978 * ip6_route_get_saddr, and then ip6_route_output; however,
979 * the route-specific preferred source forces the
980 * ip6_route_output call _before_ ip6_route_get_saddr.
982 * In source specific routing (no src=any default route),
983 * ip6_route_output will fail given src=any saddr, though, so
984 * that's why we try it again later.
986 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
987 struct fib6_info *from;
989 bool had_dst = *dst != NULL;
992 *dst = ip6_route_output(net, sk, fl6);
993 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
996 from = rt ? rcu_dereference(rt->from) : NULL;
997 err = ip6_route_get_saddr(net, from, &fl6->daddr,
998 sk ? inet6_sk(sk)->srcprefs : 0,
1003 goto out_err_release;
1005 /* If we had an erroneous initial result, pretend it
1006 * never existed and let the SA-enabled version take
1009 if (!had_dst && (*dst)->error) {
1014 if (fl6->flowi6_oif)
1015 flags |= RT6_LOOKUP_F_IFACE;
1019 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1021 err = (*dst)->error;
1023 goto out_err_release;
1025 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1027 * Here if the dst entry we've looked up
1028 * has a neighbour entry that is in the INCOMPLETE
1029 * state and the src address from the flow is
1030 * marked as OPTIMISTIC, we release the found
1031 * dst entry and replace it instead with the
1032 * dst entry of the nexthop router
1034 rt = (struct rt6_info *) *dst;
1036 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1037 rt6_nexthop(rt, &fl6->daddr));
1038 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1039 rcu_read_unlock_bh();
1042 struct inet6_ifaddr *ifp;
1043 struct flowi6 fl_gw6;
1046 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1049 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1055 * We need to get the dst entry for the
1056 * default router instead
1059 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1060 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1061 *dst = ip6_route_output(net, sk, &fl_gw6);
1062 err = (*dst)->error;
1064 goto out_err_release;
1068 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1069 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1070 err = -EAFNOSUPPORT;
1071 goto out_err_release;
1080 if (err == -ENETUNREACH)
1081 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1086 * ip6_dst_lookup - perform route lookup on flow
1087 * @sk: socket which provides route info
1088 * @dst: pointer to dst_entry * for result
1089 * @fl6: flow to lookup
1091 * This function performs a route lookup on the given flow.
1093 * It returns zero on success, or a standard errno code on error.
1095 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1099 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1101 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1104 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1105 * @sk: socket which provides route info
1106 * @fl6: flow to lookup
1107 * @final_dst: final destination address for ipsec lookup
1109 * This function performs a route lookup on the given flow.
1111 * It returns a valid dst pointer on success, or a pointer encoded
1114 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1115 const struct in6_addr *final_dst)
1117 struct dst_entry *dst = NULL;
1120 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1122 return ERR_PTR(err);
1124 fl6->daddr = *final_dst;
1126 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1128 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1131 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1132 * @sk: socket which provides the dst cache and route info
1133 * @fl6: flow to lookup
1134 * @final_dst: final destination address for ipsec lookup
1135 * @connected: whether @sk is connected or not
1137 * This function performs a route lookup on the given flow with the
1138 * possibility of using the cached route in the socket if it is valid.
1139 * It will take the socket dst lock when operating on the dst cache.
1140 * As a result, this function can only be used in process context.
1142 * In addition, for a connected socket, cache the dst in the socket
1143 * if the current cache is not valid.
1145 * It returns a valid dst pointer on success, or a pointer encoded
1148 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1149 const struct in6_addr *final_dst,
1152 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1154 dst = ip6_sk_dst_check(sk, dst, fl6);
1158 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1159 if (connected && !IS_ERR(dst))
1160 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1164 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1166 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1169 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1172 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1175 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1178 static void ip6_append_data_mtu(unsigned int *mtu,
1180 unsigned int fragheaderlen,
1181 struct sk_buff *skb,
1182 struct rt6_info *rt,
1183 unsigned int orig_mtu)
1185 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1187 /* first fragment, reserve header_len */
1188 *mtu = orig_mtu - rt->dst.header_len;
1192 * this fragment is not first, the headers
1193 * space is regarded as data space.
1197 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1198 + fragheaderlen - sizeof(struct frag_hdr);
1202 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1203 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1204 struct rt6_info *rt, struct flowi6 *fl6)
1206 struct ipv6_pinfo *np = inet6_sk(sk);
1208 struct ipv6_txoptions *opt = ipc6->opt;
1214 if (WARN_ON(v6_cork->opt))
1217 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1218 if (unlikely(!v6_cork->opt))
1221 v6_cork->opt->tot_len = sizeof(*opt);
1222 v6_cork->opt->opt_flen = opt->opt_flen;
1223 v6_cork->opt->opt_nflen = opt->opt_nflen;
1225 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1227 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1230 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1232 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1235 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1237 if (opt->hopopt && !v6_cork->opt->hopopt)
1240 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1242 if (opt->srcrt && !v6_cork->opt->srcrt)
1245 /* need source address above miyazawa*/
1248 cork->base.dst = &rt->dst;
1249 cork->fl.u.ip6 = *fl6;
1250 v6_cork->hop_limit = ipc6->hlimit;
1251 v6_cork->tclass = ipc6->tclass;
1252 if (rt->dst.flags & DST_XFRM_TUNNEL)
1253 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1254 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1256 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1257 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1258 if (np->frag_size < mtu) {
1260 mtu = np->frag_size;
1262 if (mtu < IPV6_MIN_MTU)
1264 cork->base.fragsize = mtu;
1265 cork->base.gso_size = ipc6->gso_size;
1266 cork->base.tx_flags = 0;
1267 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1269 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1270 cork->base.flags |= IPCORK_ALLFRAG;
1271 cork->base.length = 0;
1273 cork->base.transmit_time = ipc6->sockc.transmit_time;
1278 static int __ip6_append_data(struct sock *sk,
1280 struct sk_buff_head *queue,
1281 struct inet_cork *cork,
1282 struct inet6_cork *v6_cork,
1283 struct page_frag *pfrag,
1284 int getfrag(void *from, char *to, int offset,
1285 int len, int odd, struct sk_buff *skb),
1286 void *from, int length, int transhdrlen,
1287 unsigned int flags, struct ipcm6_cookie *ipc6)
1289 struct sk_buff *skb, *skb_prev = NULL;
1290 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1292 int dst_exthdrlen = 0;
1298 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1299 struct ipv6_txoptions *opt = v6_cork->opt;
1300 int csummode = CHECKSUM_NONE;
1301 unsigned int maxnonfragsize, headersize;
1302 unsigned int wmem_alloc_delta = 0;
1305 skb = skb_peek_tail(queue);
1307 exthdrlen = opt ? opt->opt_flen : 0;
1308 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1311 paged = !!cork->gso_size;
1312 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1315 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1316 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1317 tskey = sk->sk_tskey++;
1319 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1321 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1322 (opt ? opt->opt_nflen : 0);
1323 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1324 sizeof(struct frag_hdr);
1326 headersize = sizeof(struct ipv6hdr) +
1327 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1328 (dst_allfrag(&rt->dst) ?
1329 sizeof(struct frag_hdr) : 0) +
1330 rt->rt6i_nfheader_len;
1332 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1333 * the first fragment
1335 if (headersize + transhdrlen > mtu)
1338 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1339 (sk->sk_protocol == IPPROTO_UDP ||
1340 sk->sk_protocol == IPPROTO_RAW)) {
1341 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1342 sizeof(struct ipv6hdr));
1346 if (ip6_sk_ignore_df(sk))
1347 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1349 maxnonfragsize = mtu;
1351 if (cork->length + length > maxnonfragsize - headersize) {
1353 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1354 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1358 /* CHECKSUM_PARTIAL only with no extension headers and when
1359 * we are not going to fragment
1361 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1362 headersize == sizeof(struct ipv6hdr) &&
1363 length <= mtu - headersize &&
1364 (!(flags & MSG_MORE) || cork->gso_size) &&
1365 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1366 csummode = CHECKSUM_PARTIAL;
1369 * Let's try using as much space as possible.
1370 * Use MTU if total length of the message fits into the MTU.
1371 * Otherwise, we need to reserve fragment header and
1372 * fragment alignment (= 8-15 octects, in total).
1374 * Note that we may need to "move" the data from the tail of
1375 * of the buffer to the new fragment when we split
1378 * FIXME: It may be fragmented into multiple chunks
1379 * at once if non-fragmentable extension headers
1384 cork->length += length;
1388 while (length > 0) {
1389 /* Check if the remaining data fits into current packet. */
1390 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1392 copy = maxfraglen - skb->len;
1396 unsigned int datalen;
1397 unsigned int fraglen;
1398 unsigned int fraggap;
1399 unsigned int alloclen, alloc_extra;
1400 unsigned int pagedlen;
1402 /* There's no room in the current skb */
1404 fraggap = skb->len - maxfraglen;
1407 /* update mtu and maxfraglen if necessary */
1408 if (!skb || !skb_prev)
1409 ip6_append_data_mtu(&mtu, &maxfraglen,
1410 fragheaderlen, skb, rt,
1416 * If remaining data exceeds the mtu,
1417 * we know we need more fragment(s).
1419 datalen = length + fraggap;
1421 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1422 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1423 fraglen = datalen + fragheaderlen;
1426 alloc_extra = hh_len;
1427 alloc_extra += dst_exthdrlen;
1428 alloc_extra += rt->dst.trailer_len;
1430 /* We just reserve space for fragment header.
1431 * Note: this may be overallocation if the message
1432 * (without MSG_MORE) fits into the MTU.
1434 alloc_extra += sizeof(struct frag_hdr);
1436 if ((flags & MSG_MORE) &&
1437 !(rt->dst.dev->features&NETIF_F_SG))
1440 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1441 !(rt->dst.dev->features & NETIF_F_SG)))
1444 alloclen = min_t(int, fraglen, MAX_HEADER);
1445 pagedlen = fraglen - alloclen;
1447 alloclen += alloc_extra;
1449 if (datalen != length + fraggap) {
1451 * this is not the last fragment, the trailer
1452 * space is regarded as data space.
1454 datalen += rt->dst.trailer_len;
1457 fraglen = datalen + fragheaderlen;
1459 copy = datalen - transhdrlen - fraggap - pagedlen;
1465 skb = sock_alloc_send_skb(sk, alloclen,
1466 (flags & MSG_DONTWAIT), &err);
1469 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1471 skb = alloc_skb(alloclen,
1479 * Fill in the control structures
1481 skb->protocol = htons(ETH_P_IPV6);
1482 skb->ip_summed = csummode;
1484 /* reserve for fragmentation and ipsec header */
1485 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1488 /* Only the initial fragment is time stamped */
1489 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1491 skb_shinfo(skb)->tskey = tskey;
1495 * Find where to start putting bytes
1497 data = skb_put(skb, fraglen - pagedlen);
1498 skb_set_network_header(skb, exthdrlen);
1499 data += fragheaderlen;
1500 skb->transport_header = (skb->network_header +
1503 skb->csum = skb_copy_and_csum_bits(
1504 skb_prev, maxfraglen,
1505 data + transhdrlen, fraggap, 0);
1506 skb_prev->csum = csum_sub(skb_prev->csum,
1509 pskb_trim_unique(skb_prev, maxfraglen);
1512 getfrag(from, data + transhdrlen, offset,
1513 copy, fraggap, skb) < 0) {
1520 length -= copy + transhdrlen;
1525 if ((flags & MSG_CONFIRM) && !skb_prev)
1526 skb_set_dst_pending_confirm(skb, 1);
1529 * Put the packet on the pending queue
1531 if (!skb->destructor) {
1532 skb->destructor = sock_wfree;
1534 wmem_alloc_delta += skb->truesize;
1536 __skb_queue_tail(queue, skb);
1543 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1544 skb_tailroom(skb) >= copy) {
1548 if (getfrag(from, skb_put(skb, copy),
1549 offset, copy, off, skb) < 0) {
1550 __skb_trim(skb, off);
1555 int i = skb_shinfo(skb)->nr_frags;
1558 if (!sk_page_frag_refill(sk, pfrag))
1561 if (!skb_can_coalesce(skb, i, pfrag->page,
1564 if (i == MAX_SKB_FRAGS)
1567 __skb_fill_page_desc(skb, i, pfrag->page,
1569 skb_shinfo(skb)->nr_frags = ++i;
1570 get_page(pfrag->page);
1572 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1574 page_address(pfrag->page) + pfrag->offset,
1575 offset, copy, skb->len, skb) < 0)
1578 pfrag->offset += copy;
1579 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1581 skb->data_len += copy;
1582 skb->truesize += copy;
1583 wmem_alloc_delta += copy;
1589 if (wmem_alloc_delta)
1590 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1596 cork->length -= length;
1597 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1598 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1602 int ip6_append_data(struct sock *sk,
1603 int getfrag(void *from, char *to, int offset, int len,
1604 int odd, struct sk_buff *skb),
1605 void *from, int length, int transhdrlen,
1606 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1607 struct rt6_info *rt, unsigned int flags)
1609 struct inet_sock *inet = inet_sk(sk);
1610 struct ipv6_pinfo *np = inet6_sk(sk);
1614 if (flags&MSG_PROBE)
1616 if (skb_queue_empty(&sk->sk_write_queue)) {
1620 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1625 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1626 length += exthdrlen;
1627 transhdrlen += exthdrlen;
1629 fl6 = &inet->cork.fl.u.ip6;
1633 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1634 &np->cork, sk_page_frag(sk), getfrag,
1635 from, length, transhdrlen, flags, ipc6);
1637 EXPORT_SYMBOL_GPL(ip6_append_data);
1639 static void ip6_cork_release(struct inet_cork_full *cork,
1640 struct inet6_cork *v6_cork)
1643 kfree(v6_cork->opt->dst0opt);
1644 kfree(v6_cork->opt->dst1opt);
1645 kfree(v6_cork->opt->hopopt);
1646 kfree(v6_cork->opt->srcrt);
1647 kfree(v6_cork->opt);
1648 v6_cork->opt = NULL;
1651 if (cork->base.dst) {
1652 dst_release(cork->base.dst);
1653 cork->base.dst = NULL;
1654 cork->base.flags &= ~IPCORK_ALLFRAG;
1656 memset(&cork->fl, 0, sizeof(cork->fl));
1659 struct sk_buff *__ip6_make_skb(struct sock *sk,
1660 struct sk_buff_head *queue,
1661 struct inet_cork_full *cork,
1662 struct inet6_cork *v6_cork)
1664 struct sk_buff *skb, *tmp_skb;
1665 struct sk_buff **tail_skb;
1666 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1667 struct ipv6_pinfo *np = inet6_sk(sk);
1668 struct net *net = sock_net(sk);
1669 struct ipv6hdr *hdr;
1670 struct ipv6_txoptions *opt = v6_cork->opt;
1671 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1672 struct flowi6 *fl6 = &cork->fl.u.ip6;
1673 unsigned char proto = fl6->flowi6_proto;
1675 skb = __skb_dequeue(queue);
1678 tail_skb = &(skb_shinfo(skb)->frag_list);
1680 /* move skb->data to ip header from ext header */
1681 if (skb->data < skb_network_header(skb))
1682 __skb_pull(skb, skb_network_offset(skb));
1683 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1684 __skb_pull(tmp_skb, skb_network_header_len(skb));
1685 *tail_skb = tmp_skb;
1686 tail_skb = &(tmp_skb->next);
1687 skb->len += tmp_skb->len;
1688 skb->data_len += tmp_skb->len;
1689 skb->truesize += tmp_skb->truesize;
1690 tmp_skb->destructor = NULL;
1694 /* Allow local fragmentation. */
1695 skb->ignore_df = ip6_sk_ignore_df(sk);
1697 *final_dst = fl6->daddr;
1698 __skb_pull(skb, skb_network_header_len(skb));
1699 if (opt && opt->opt_flen)
1700 ipv6_push_frag_opts(skb, opt, &proto);
1701 if (opt && opt->opt_nflen)
1702 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1704 skb_push(skb, sizeof(struct ipv6hdr));
1705 skb_reset_network_header(skb);
1706 hdr = ipv6_hdr(skb);
1708 ip6_flow_hdr(hdr, v6_cork->tclass,
1709 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1710 ip6_autoflowlabel(net, np), fl6));
1711 hdr->hop_limit = v6_cork->hop_limit;
1712 hdr->nexthdr = proto;
1713 hdr->saddr = fl6->saddr;
1714 hdr->daddr = *final_dst;
1716 skb->priority = sk->sk_priority;
1717 skb->mark = sk->sk_mark;
1719 skb->tstamp = cork->base.transmit_time;
1721 skb_dst_set(skb, dst_clone(&rt->dst));
1722 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1723 if (proto == IPPROTO_ICMPV6) {
1724 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1726 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1727 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1730 ip6_cork_release(cork, v6_cork);
1735 int ip6_send_skb(struct sk_buff *skb)
1737 struct net *net = sock_net(skb->sk);
1738 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1741 err = ip6_local_out(net, skb->sk, skb);
1744 err = net_xmit_errno(err);
1746 IP6_INC_STATS(net, rt->rt6i_idev,
1747 IPSTATS_MIB_OUTDISCARDS);
1753 int ip6_push_pending_frames(struct sock *sk)
1755 struct sk_buff *skb;
1757 skb = ip6_finish_skb(sk);
1761 return ip6_send_skb(skb);
1763 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1765 static void __ip6_flush_pending_frames(struct sock *sk,
1766 struct sk_buff_head *queue,
1767 struct inet_cork_full *cork,
1768 struct inet6_cork *v6_cork)
1770 struct sk_buff *skb;
1772 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1774 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1775 IPSTATS_MIB_OUTDISCARDS);
1779 ip6_cork_release(cork, v6_cork);
1782 void ip6_flush_pending_frames(struct sock *sk)
1784 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1785 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1787 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1789 struct sk_buff *ip6_make_skb(struct sock *sk,
1790 int getfrag(void *from, char *to, int offset,
1791 int len, int odd, struct sk_buff *skb),
1792 void *from, int length, int transhdrlen,
1793 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1794 struct rt6_info *rt, unsigned int flags,
1795 struct inet_cork_full *cork)
1797 struct inet6_cork v6_cork;
1798 struct sk_buff_head queue;
1799 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1802 if (flags & MSG_PROBE)
1805 __skb_queue_head_init(&queue);
1807 cork->base.flags = 0;
1808 cork->base.addr = 0;
1809 cork->base.opt = NULL;
1810 cork->base.dst = NULL;
1812 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1814 ip6_cork_release(cork, &v6_cork);
1815 return ERR_PTR(err);
1817 if (ipc6->dontfrag < 0)
1818 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1820 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1821 ¤t->task_frag, getfrag, from,
1822 length + exthdrlen, transhdrlen + exthdrlen,
1825 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1826 return ERR_PTR(err);
1829 return __ip6_make_skb(sk, &queue, cork, &v6_cork);