2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
64 struct dst_entry *dst = skb_dst(skb);
65 struct net_device *dev = dst->dev;
66 struct neighbour *neigh;
67 struct in6_addr *nexthop;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
73 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
74 ((mroute6_is_socket(net, skb) &&
75 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77 &ipv6_hdr(skb)->saddr))) {
78 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85 net, sk, newskb, NULL, newskb->dev,
88 if (ipv6_hdr(skb)->hop_limit == 0) {
89 IP6_INC_STATS(net, idev,
90 IPSTATS_MIB_OUTDISCARDS);
96 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
99 IPV6_ADDR_SCOPE_NODELOCAL &&
100 !(dev->flags & IFF_LOOPBACK)) {
106 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
107 int res = lwtunnel_xmit(skb);
109 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
114 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
115 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
116 if (unlikely(!neigh))
117 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
118 if (!IS_ERR(neigh)) {
119 sock_confirm_neigh(skb, neigh);
120 ret = neigh_output(neigh, skb);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
132 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
133 struct sk_buff *skb, unsigned int mtu)
135 struct sk_buff *segs, *nskb;
136 netdev_features_t features;
139 /* Please see corresponding comment in ip_finish_output_gso
140 * describing the cases where GSO segment length exceeds the
143 features = netif_skb_features(skb);
144 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
145 if (IS_ERR_OR_NULL(segs)) {
152 skb_list_walk_safe(segs, segs, nskb) {
155 skb_mark_not_on_list(segs);
156 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
164 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
169 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
175 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
176 /* Policy lookup after SNAT yielded a new policy */
177 if (skb_dst(skb)->xfrm) {
178 IP6CB(skb)->flags |= IP6SKB_REROUTED;
179 return dst_output(net, sk, skb);
183 mtu = ip6_skb_dst_mtu(skb);
184 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
185 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
187 if ((skb->len > mtu && !skb_is_gso(skb)) ||
188 dst_allfrag(skb_dst(skb)) ||
189 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
190 return ip6_fragment(net, sk, skb, ip6_finish_output2);
192 return ip6_finish_output2(net, sk, skb);
195 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
197 struct net_device *dev = skb_dst(skb)->dev;
198 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
200 skb->protocol = htons(ETH_P_IPV6);
203 if (unlikely(idev->cnf.disable_ipv6)) {
204 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
209 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
210 net, sk, skb, NULL, dev,
212 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
215 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
217 if (!np->autoflowlabel_set)
218 return ip6_default_np_autolabel(net);
220 return np->autoflowlabel;
224 * xmit an sk_buff (used by TCP, SCTP and DCCP)
225 * Note : socket lock is not held for SYNACK packets, but might be modified
226 * by calls to skb_set_owner_w() and ipv6_local_error(),
227 * which are using proper atomic operations or spinlocks.
229 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
230 __u32 mark, struct ipv6_txoptions *opt, int tclass)
232 struct net *net = sock_net(sk);
233 const struct ipv6_pinfo *np = inet6_sk(sk);
234 struct in6_addr *first_hop = &fl6->daddr;
235 struct dst_entry *dst = skb_dst(skb);
236 unsigned int head_room;
238 u8 proto = fl6->flowi6_proto;
239 int seg_len = skb->len;
243 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
245 head_room += opt->opt_nflen + opt->opt_flen;
247 if (unlikely(skb_headroom(skb) < head_room)) {
248 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
250 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
251 IPSTATS_MIB_OUTDISCARDS);
256 skb_set_owner_w(skb2, skb->sk);
262 seg_len += opt->opt_nflen + opt->opt_flen;
265 ipv6_push_frag_opts(skb, opt, &proto);
268 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
272 skb_push(skb, sizeof(struct ipv6hdr));
273 skb_reset_network_header(skb);
277 * Fill in the IPv6 header
280 hlimit = np->hop_limit;
282 hlimit = ip6_dst_hoplimit(dst);
284 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
285 ip6_autoflowlabel(net, np), fl6));
287 hdr->payload_len = htons(seg_len);
288 hdr->nexthdr = proto;
289 hdr->hop_limit = hlimit;
291 hdr->saddr = fl6->saddr;
292 hdr->daddr = *first_hop;
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->priority = sk->sk_priority;
299 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
300 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
301 IPSTATS_MIB_OUT, skb->len);
303 /* if egress device is enslaved to an L3 master device pass the
304 * skb to its handler for processing
306 skb = l3mdev_ip6_out((struct sock *)sk, skb);
310 /* hooks should never assume socket lock is held.
311 * we promote our socket to non const
313 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
314 net, (struct sock *)sk, skb, NULL, dst->dev,
319 /* ipv6_local_error() does not require socket lock,
320 * we promote our socket to non const
322 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
324 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
328 EXPORT_SYMBOL(ip6_xmit);
330 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
332 struct ip6_ra_chain *ra;
333 struct sock *last = NULL;
335 read_lock(&ip6_ra_lock);
336 for (ra = ip6_ra_chain; ra; ra = ra->next) {
337 struct sock *sk = ra->sk;
338 if (sk && ra->sel == sel &&
339 (!sk->sk_bound_dev_if ||
340 sk->sk_bound_dev_if == skb->dev->ifindex)) {
342 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
344 rawv6_rcv(last, skb2);
351 rawv6_rcv(last, skb);
352 read_unlock(&ip6_ra_lock);
355 read_unlock(&ip6_ra_lock);
359 static int ip6_forward_proxy_check(struct sk_buff *skb)
361 struct ipv6hdr *hdr = ipv6_hdr(skb);
362 u8 nexthdr = hdr->nexthdr;
366 if (ipv6_ext_hdr(nexthdr)) {
367 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
371 offset = sizeof(struct ipv6hdr);
373 if (nexthdr == IPPROTO_ICMPV6) {
374 struct icmp6hdr *icmp6;
376 if (!pskb_may_pull(skb, (skb_network_header(skb) +
377 offset + 1 - skb->data)))
380 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
382 switch (icmp6->icmp6_type) {
383 case NDISC_ROUTER_SOLICITATION:
384 case NDISC_ROUTER_ADVERTISEMENT:
385 case NDISC_NEIGHBOUR_SOLICITATION:
386 case NDISC_NEIGHBOUR_ADVERTISEMENT:
388 /* For reaction involving unicast neighbor discovery
389 * message destined to the proxied address, pass it to
399 * The proxying router can't forward traffic sent to a link-local
400 * address, so signal the sender and discard the packet. This
401 * behavior is clarified by the MIPv6 specification.
403 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
404 dst_link_failure(skb);
411 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
414 struct dst_entry *dst = skb_dst(skb);
416 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
417 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
420 return dst_output(net, sk, skb);
423 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
428 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
429 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
435 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
441 int ip6_forward(struct sk_buff *skb)
443 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
444 struct dst_entry *dst = skb_dst(skb);
445 struct ipv6hdr *hdr = ipv6_hdr(skb);
446 struct inet6_skb_parm *opt = IP6CB(skb);
447 struct net *net = dev_net(dst->dev);
450 if (net->ipv6.devconf_all->forwarding == 0)
453 if (skb->pkt_type != PACKET_HOST)
456 if (unlikely(skb->sk))
459 if (skb_warn_if_lro(skb))
462 if (!net->ipv6.devconf_all->disable_policy &&
463 (!idev || !idev->cnf.disable_policy) &&
464 !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
465 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
469 skb_forward_csum(skb);
472 * We DO NOT make any processing on
473 * RA packets, pushing them to user level AS IS
474 * without ane WARRANTY that application will be able
475 * to interpret them. The reason is that we
476 * cannot make anything clever here.
478 * We are not end-node, so that if packet contains
479 * AH/ESP, we cannot make anything.
480 * Defragmentation also would be mistake, RA packets
481 * cannot be fragmented, because there is no warranty
482 * that different fragments will go along one path. --ANK
484 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
485 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
490 * check and decrement ttl
492 if (hdr->hop_limit <= 1) {
493 /* Force OUTPUT device used as source address */
495 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
496 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
502 /* XXX: idev->cnf.proxy_ndp? */
503 if (net->ipv6.devconf_all->proxy_ndp &&
504 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
505 int proxied = ip6_forward_proxy_check(skb);
507 return ip6_input(skb);
508 else if (proxied < 0) {
509 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
514 if (!xfrm6_route_forward(skb)) {
515 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
520 /* IPv6 specs say nothing about it, but it is clear that we cannot
521 send redirects to source routed frames.
522 We don't send redirects to frames decapsulated from IPsec.
524 if (IP6CB(skb)->iif == dst->dev->ifindex &&
525 opt->srcrt == 0 && !skb_sec_path(skb)) {
526 struct in6_addr *target = NULL;
527 struct inet_peer *peer;
531 * incoming and outgoing devices are the same
535 rt = (struct rt6_info *) dst;
536 if (rt->rt6i_flags & RTF_GATEWAY)
537 target = &rt->rt6i_gateway;
539 target = &hdr->daddr;
541 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
543 /* Limit redirects both by destination (here)
544 and by source (inside ndisc_send_redirect)
546 if (inet_peer_xrlim_allow(peer, 1*HZ))
547 ndisc_send_redirect(skb, target);
551 int addrtype = ipv6_addr_type(&hdr->saddr);
553 /* This check is security critical. */
554 if (addrtype == IPV6_ADDR_ANY ||
555 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
557 if (addrtype & IPV6_ADDR_LINKLOCAL) {
558 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
559 ICMPV6_NOT_NEIGHBOUR, 0);
564 mtu = ip6_dst_mtu_forward(dst);
565 if (mtu < IPV6_MIN_MTU)
568 if (ip6_pkt_too_big(skb, mtu)) {
569 /* Again, force OUTPUT device used as source address */
571 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
572 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
573 __IP6_INC_STATS(net, ip6_dst_idev(dst),
574 IPSTATS_MIB_FRAGFAILS);
579 if (skb_cow(skb, dst->dev->hard_header_len)) {
580 __IP6_INC_STATS(net, ip6_dst_idev(dst),
581 IPSTATS_MIB_OUTDISCARDS);
587 /* Mangling hops number delayed to point after skb COW */
591 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
592 net, NULL, skb, skb->dev, dst->dev,
596 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
602 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
604 to->pkt_type = from->pkt_type;
605 to->priority = from->priority;
606 to->protocol = from->protocol;
608 skb_dst_set(to, dst_clone(skb_dst(from)));
610 to->mark = from->mark;
612 skb_copy_hash(to, from);
614 #ifdef CONFIG_NET_SCHED
615 to->tc_index = from->tc_index;
618 skb_copy_secmark(to, from);
621 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
622 int (*output)(struct net *, struct sock *, struct sk_buff *))
624 struct sk_buff *frag;
625 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
626 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
627 inet6_sk(skb->sk) : NULL;
628 struct ipv6hdr *tmp_hdr;
630 unsigned int mtu, hlen, left, len, nexthdr_offset;
633 int ptr, offset = 0, err = 0;
634 u8 *prevhdr, nexthdr = 0;
636 err = ip6_find_1stfragopt(skb, &prevhdr);
641 nexthdr_offset = prevhdr - skb_network_header(skb);
643 mtu = ip6_skb_dst_mtu(skb);
645 /* We must not fragment if the socket is set to force MTU discovery
646 * or if the skb it not generated by a local socket.
648 if (unlikely(!skb->ignore_df && skb->len > mtu))
651 if (IP6CB(skb)->frag_max_size) {
652 if (IP6CB(skb)->frag_max_size > mtu)
655 /* don't send fragments larger than what we received */
656 mtu = IP6CB(skb)->frag_max_size;
657 if (mtu < IPV6_MIN_MTU)
661 if (np && np->frag_size < mtu) {
665 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
667 mtu -= hlen + sizeof(struct frag_hdr);
669 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
670 &ipv6_hdr(skb)->saddr);
672 if (skb->ip_summed == CHECKSUM_PARTIAL &&
673 (err = skb_checksum_help(skb)))
676 prevhdr = skb_network_header(skb) + nexthdr_offset;
677 hroom = LL_RESERVED_SPACE(rt->dst.dev);
678 if (skb_has_frag_list(skb)) {
679 unsigned int first_len = skb_pagelen(skb);
680 struct sk_buff *frag2;
682 if (first_len - hlen > mtu ||
683 ((first_len - hlen) & 7) ||
685 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
688 skb_walk_frags(skb, frag) {
689 /* Correct geometry. */
690 if (frag->len > mtu ||
691 ((frag->len & 7) && frag->next) ||
692 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
693 goto slow_path_clean;
695 /* Partially cloned skb? */
696 if (skb_shared(frag))
697 goto slow_path_clean;
702 frag->destructor = sock_wfree;
704 skb->truesize -= frag->truesize;
711 *prevhdr = NEXTHDR_FRAGMENT;
712 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
717 frag = skb_shinfo(skb)->frag_list;
718 skb_frag_list_init(skb);
720 __skb_pull(skb, hlen);
721 fh = __skb_push(skb, sizeof(struct frag_hdr));
722 __skb_push(skb, hlen);
723 skb_reset_network_header(skb);
724 memcpy(skb_network_header(skb), tmp_hdr, hlen);
726 fh->nexthdr = nexthdr;
728 fh->frag_off = htons(IP6_MF);
729 fh->identification = frag_id;
731 first_len = skb_pagelen(skb);
732 skb->data_len = first_len - skb_headlen(skb);
733 skb->len = first_len;
734 ipv6_hdr(skb)->payload_len = htons(first_len -
735 sizeof(struct ipv6hdr));
738 /* Prepare header of the next frame,
739 * before previous one went down. */
741 frag->ip_summed = CHECKSUM_NONE;
742 skb_reset_transport_header(frag);
743 fh = __skb_push(frag, sizeof(struct frag_hdr));
744 __skb_push(frag, hlen);
745 skb_reset_network_header(frag);
746 memcpy(skb_network_header(frag), tmp_hdr,
748 offset += skb->len - hlen - sizeof(struct frag_hdr);
749 fh->nexthdr = nexthdr;
751 fh->frag_off = htons(offset);
753 fh->frag_off |= htons(IP6_MF);
754 fh->identification = frag_id;
755 ipv6_hdr(frag)->payload_len =
757 sizeof(struct ipv6hdr));
758 ip6_copy_metadata(frag, skb);
761 err = output(net, sk, skb);
763 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
764 IPSTATS_MIB_FRAGCREATES);
777 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
778 IPSTATS_MIB_FRAGOKS);
782 kfree_skb_list(frag);
784 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
785 IPSTATS_MIB_FRAGFAILS);
789 skb_walk_frags(skb, frag2) {
793 frag2->destructor = NULL;
794 skb->truesize += frag2->truesize;
799 left = skb->len - hlen; /* Space per frame */
800 ptr = hlen; /* Where to start from */
803 * Fragment the datagram.
806 troom = rt->dst.dev->needed_tailroom;
809 * Keep copying data until we run out.
812 u8 *fragnexthdr_offset;
815 /* IF: it doesn't fit, use 'mtu' - the data space left */
818 /* IF: we are not sending up to and including the packet end
819 then align the next start on an eight byte boundary */
824 /* Allocate buffer */
825 frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
826 hroom + troom, GFP_ATOMIC);
833 * Set up data on packet
836 ip6_copy_metadata(frag, skb);
837 skb_reserve(frag, hroom);
838 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
839 skb_reset_network_header(frag);
840 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
841 frag->transport_header = (frag->network_header + hlen +
842 sizeof(struct frag_hdr));
845 * Charge the memory for the fragment to any owner
849 skb_set_owner_w(frag, skb->sk);
852 * Copy the packet header into the new buffer.
854 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
856 fragnexthdr_offset = skb_network_header(frag);
857 fragnexthdr_offset += prevhdr - skb_network_header(skb);
858 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
861 * Build fragment header.
863 fh->nexthdr = nexthdr;
865 fh->identification = frag_id;
868 * Copy a block of the IP datagram.
870 BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
874 fh->frag_off = htons(offset);
876 fh->frag_off |= htons(IP6_MF);
877 ipv6_hdr(frag)->payload_len = htons(frag->len -
878 sizeof(struct ipv6hdr));
884 * Put this fragment into the sending queue.
886 err = output(net, sk, frag);
890 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
891 IPSTATS_MIB_FRAGCREATES);
893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
894 IPSTATS_MIB_FRAGOKS);
899 if (skb->sk && dst_allfrag(skb_dst(skb)))
900 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
902 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
906 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
907 IPSTATS_MIB_FRAGFAILS);
912 static inline int ip6_rt_check(const struct rt6key *rt_key,
913 const struct in6_addr *fl_addr,
914 const struct in6_addr *addr_cache)
916 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
917 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
920 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
921 struct dst_entry *dst,
922 const struct flowi6 *fl6)
924 struct ipv6_pinfo *np = inet6_sk(sk);
930 if (dst->ops->family != AF_INET6) {
935 rt = (struct rt6_info *)dst;
936 /* Yes, checking route validity in not connected
937 * case is not very simple. Take into account,
938 * that we do not support routing by source, TOS,
939 * and MSG_DONTROUTE --ANK (980726)
941 * 1. ip6_rt_check(): If route was host route,
942 * check that cached destination is current.
943 * If it is network route, we still may
944 * check its validity using saved pointer
945 * to the last used address: daddr_cache.
946 * We do not want to save whole address now,
947 * (because main consumer of this service
948 * is tcp, which has not this problem),
949 * so that the last trick works only on connected
951 * 2. oif also should be the same.
953 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
954 #ifdef CONFIG_IPV6_SUBTREES
955 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
957 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
958 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
967 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
968 struct dst_entry **dst, struct flowi6 *fl6)
970 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
977 /* The correct way to handle this would be to do
978 * ip6_route_get_saddr, and then ip6_route_output; however,
979 * the route-specific preferred source forces the
980 * ip6_route_output call _before_ ip6_route_get_saddr.
982 * In source specific routing (no src=any default route),
983 * ip6_route_output will fail given src=any saddr, though, so
984 * that's why we try it again later.
986 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
987 struct fib6_info *from;
989 bool had_dst = *dst != NULL;
992 *dst = ip6_route_output(net, sk, fl6);
993 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
996 from = rt ? rcu_dereference(rt->from) : NULL;
997 err = ip6_route_get_saddr(net, from, &fl6->daddr,
998 sk ? inet6_sk(sk)->srcprefs : 0,
1003 goto out_err_release;
1005 /* If we had an erroneous initial result, pretend it
1006 * never existed and let the SA-enabled version take
1009 if (!had_dst && (*dst)->error) {
1014 if (fl6->flowi6_oif)
1015 flags |= RT6_LOOKUP_F_IFACE;
1019 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1021 err = (*dst)->error;
1023 goto out_err_release;
1025 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1027 * Here if the dst entry we've looked up
1028 * has a neighbour entry that is in the INCOMPLETE
1029 * state and the src address from the flow is
1030 * marked as OPTIMISTIC, we release the found
1031 * dst entry and replace it instead with the
1032 * dst entry of the nexthop router
1034 rt = (struct rt6_info *) *dst;
1036 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1037 rt6_nexthop(rt, &fl6->daddr));
1038 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1039 rcu_read_unlock_bh();
1042 struct inet6_ifaddr *ifp;
1043 struct flowi6 fl_gw6;
1046 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1049 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1055 * We need to get the dst entry for the
1056 * default router instead
1059 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1060 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1061 *dst = ip6_route_output(net, sk, &fl_gw6);
1062 err = (*dst)->error;
1064 goto out_err_release;
1068 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1069 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1070 err = -EAFNOSUPPORT;
1071 goto out_err_release;
1080 if (err == -ENETUNREACH)
1081 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1086 * ip6_dst_lookup - perform route lookup on flow
1087 * @sk: socket which provides route info
1088 * @dst: pointer to dst_entry * for result
1089 * @fl6: flow to lookup
1091 * This function performs a route lookup on the given flow.
1093 * It returns zero on success, or a standard errno code on error.
1095 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1099 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1101 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1104 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1105 * @sk: socket which provides route info
1106 * @fl6: flow to lookup
1107 * @final_dst: final destination address for ipsec lookup
1109 * This function performs a route lookup on the given flow.
1111 * It returns a valid dst pointer on success, or a pointer encoded
1114 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1115 const struct in6_addr *final_dst)
1117 struct dst_entry *dst = NULL;
1120 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1122 return ERR_PTR(err);
1124 fl6->daddr = *final_dst;
1126 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1128 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1131 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1132 * @sk: socket which provides the dst cache and route info
1133 * @fl6: flow to lookup
1134 * @final_dst: final destination address for ipsec lookup
1135 * @connected: whether @sk is connected or not
1137 * This function performs a route lookup on the given flow with the
1138 * possibility of using the cached route in the socket if it is valid.
1139 * It will take the socket dst lock when operating on the dst cache.
1140 * As a result, this function can only be used in process context.
1142 * In addition, for a connected socket, cache the dst in the socket
1143 * if the current cache is not valid.
1145 * It returns a valid dst pointer on success, or a pointer encoded
1148 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1149 const struct in6_addr *final_dst,
1152 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1154 dst = ip6_sk_dst_check(sk, dst, fl6);
1158 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1159 if (connected && !IS_ERR(dst))
1160 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1164 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1166 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1169 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1172 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1175 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1178 static void ip6_append_data_mtu(unsigned int *mtu,
1180 unsigned int fragheaderlen,
1181 struct sk_buff *skb,
1182 struct rt6_info *rt,
1183 unsigned int orig_mtu)
1185 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1187 /* first fragment, reserve header_len */
1188 *mtu = orig_mtu - rt->dst.header_len;
1192 * this fragment is not first, the headers
1193 * space is regarded as data space.
1197 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1198 + fragheaderlen - sizeof(struct frag_hdr);
1202 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1203 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1204 struct rt6_info *rt, struct flowi6 *fl6)
1206 struct ipv6_pinfo *np = inet6_sk(sk);
1208 struct ipv6_txoptions *opt = ipc6->opt;
1214 if (WARN_ON(v6_cork->opt))
1217 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1218 if (unlikely(!v6_cork->opt))
1221 v6_cork->opt->tot_len = sizeof(*opt);
1222 v6_cork->opt->opt_flen = opt->opt_flen;
1223 v6_cork->opt->opt_nflen = opt->opt_nflen;
1225 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1227 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1230 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1232 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1235 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1237 if (opt->hopopt && !v6_cork->opt->hopopt)
1240 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1242 if (opt->srcrt && !v6_cork->opt->srcrt)
1245 /* need source address above miyazawa*/
1248 cork->base.dst = &rt->dst;
1249 cork->fl.u.ip6 = *fl6;
1250 v6_cork->hop_limit = ipc6->hlimit;
1251 v6_cork->tclass = ipc6->tclass;
1252 if (rt->dst.flags & DST_XFRM_TUNNEL)
1253 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1254 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1256 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1257 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1258 if (np->frag_size < mtu) {
1260 mtu = np->frag_size;
1262 cork->base.fragsize = mtu;
1263 cork->base.gso_size = ipc6->gso_size;
1264 cork->base.tx_flags = 0;
1265 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1267 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1268 cork->base.flags |= IPCORK_ALLFRAG;
1269 cork->base.length = 0;
1271 cork->base.transmit_time = ipc6->sockc.transmit_time;
1276 static int __ip6_append_data(struct sock *sk,
1278 struct sk_buff_head *queue,
1279 struct inet_cork *cork,
1280 struct inet6_cork *v6_cork,
1281 struct page_frag *pfrag,
1282 int getfrag(void *from, char *to, int offset,
1283 int len, int odd, struct sk_buff *skb),
1284 void *from, int length, int transhdrlen,
1285 unsigned int flags, struct ipcm6_cookie *ipc6)
1287 struct sk_buff *skb, *skb_prev = NULL;
1288 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1290 int dst_exthdrlen = 0;
1296 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1297 struct ipv6_txoptions *opt = v6_cork->opt;
1298 int csummode = CHECKSUM_NONE;
1299 unsigned int maxnonfragsize, headersize;
1300 unsigned int wmem_alloc_delta = 0;
1303 skb = skb_peek_tail(queue);
1305 exthdrlen = opt ? opt->opt_flen : 0;
1306 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1309 paged = !!cork->gso_size;
1310 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1313 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1314 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1315 tskey = sk->sk_tskey++;
1317 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1319 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1320 (opt ? opt->opt_nflen : 0);
1322 headersize = sizeof(struct ipv6hdr) +
1323 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1324 (dst_allfrag(&rt->dst) ?
1325 sizeof(struct frag_hdr) : 0) +
1326 rt->rt6i_nfheader_len;
1328 if (mtu <= fragheaderlen ||
1329 ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
1332 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1333 sizeof(struct frag_hdr);
1335 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1336 * the first fragment
1338 if (headersize + transhdrlen > mtu)
1341 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1342 (sk->sk_protocol == IPPROTO_UDP ||
1343 sk->sk_protocol == IPPROTO_RAW)) {
1344 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1345 sizeof(struct ipv6hdr));
1349 if (ip6_sk_ignore_df(sk))
1350 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1352 maxnonfragsize = mtu;
1354 if (cork->length + length > maxnonfragsize - headersize) {
1356 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1357 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1361 /* CHECKSUM_PARTIAL only with no extension headers and when
1362 * we are not going to fragment
1364 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1365 headersize == sizeof(struct ipv6hdr) &&
1366 length <= mtu - headersize &&
1367 (!(flags & MSG_MORE) || cork->gso_size) &&
1368 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1369 csummode = CHECKSUM_PARTIAL;
1372 * Let's try using as much space as possible.
1373 * Use MTU if total length of the message fits into the MTU.
1374 * Otherwise, we need to reserve fragment header and
1375 * fragment alignment (= 8-15 octects, in total).
1377 * Note that we may need to "move" the data from the tail of
1378 * of the buffer to the new fragment when we split
1381 * FIXME: It may be fragmented into multiple chunks
1382 * at once if non-fragmentable extension headers
1387 cork->length += length;
1391 while (length > 0) {
1392 /* Check if the remaining data fits into current packet. */
1393 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1395 copy = maxfraglen - skb->len;
1399 unsigned int datalen;
1400 unsigned int fraglen;
1401 unsigned int fraggap;
1402 unsigned int alloclen, alloc_extra;
1403 unsigned int pagedlen;
1405 /* There's no room in the current skb */
1407 fraggap = skb->len - maxfraglen;
1410 /* update mtu and maxfraglen if necessary */
1411 if (!skb || !skb_prev)
1412 ip6_append_data_mtu(&mtu, &maxfraglen,
1413 fragheaderlen, skb, rt,
1419 * If remaining data exceeds the mtu,
1420 * we know we need more fragment(s).
1422 datalen = length + fraggap;
1424 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1425 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1426 fraglen = datalen + fragheaderlen;
1429 alloc_extra = hh_len;
1430 alloc_extra += dst_exthdrlen;
1431 alloc_extra += rt->dst.trailer_len;
1433 /* We just reserve space for fragment header.
1434 * Note: this may be overallocation if the message
1435 * (without MSG_MORE) fits into the MTU.
1437 alloc_extra += sizeof(struct frag_hdr);
1439 if ((flags & MSG_MORE) &&
1440 !(rt->dst.dev->features&NETIF_F_SG))
1443 (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1444 !(rt->dst.dev->features & NETIF_F_SG)))
1447 alloclen = min_t(int, fraglen, MAX_HEADER);
1448 pagedlen = fraglen - alloclen;
1450 alloclen += alloc_extra;
1452 if (datalen != length + fraggap) {
1454 * this is not the last fragment, the trailer
1455 * space is regarded as data space.
1457 datalen += rt->dst.trailer_len;
1460 fraglen = datalen + fragheaderlen;
1462 copy = datalen - transhdrlen - fraggap - pagedlen;
1468 skb = sock_alloc_send_skb(sk, alloclen,
1469 (flags & MSG_DONTWAIT), &err);
1472 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1474 skb = alloc_skb(alloclen,
1482 * Fill in the control structures
1484 skb->protocol = htons(ETH_P_IPV6);
1485 skb->ip_summed = csummode;
1487 /* reserve for fragmentation and ipsec header */
1488 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1491 /* Only the initial fragment is time stamped */
1492 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1494 skb_shinfo(skb)->tskey = tskey;
1498 * Find where to start putting bytes
1500 data = skb_put(skb, fraglen - pagedlen);
1501 skb_set_network_header(skb, exthdrlen);
1502 data += fragheaderlen;
1503 skb->transport_header = (skb->network_header +
1506 skb->csum = skb_copy_and_csum_bits(
1507 skb_prev, maxfraglen,
1508 data + transhdrlen, fraggap, 0);
1509 skb_prev->csum = csum_sub(skb_prev->csum,
1512 pskb_trim_unique(skb_prev, maxfraglen);
1515 getfrag(from, data + transhdrlen, offset,
1516 copy, fraggap, skb) < 0) {
1523 length -= copy + transhdrlen;
1528 if ((flags & MSG_CONFIRM) && !skb_prev)
1529 skb_set_dst_pending_confirm(skb, 1);
1532 * Put the packet on the pending queue
1534 if (!skb->destructor) {
1535 skb->destructor = sock_wfree;
1537 wmem_alloc_delta += skb->truesize;
1539 __skb_queue_tail(queue, skb);
1546 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1547 skb_tailroom(skb) >= copy) {
1551 if (getfrag(from, skb_put(skb, copy),
1552 offset, copy, off, skb) < 0) {
1553 __skb_trim(skb, off);
1558 int i = skb_shinfo(skb)->nr_frags;
1561 if (!sk_page_frag_refill(sk, pfrag))
1564 if (!skb_can_coalesce(skb, i, pfrag->page,
1567 if (i == MAX_SKB_FRAGS)
1570 __skb_fill_page_desc(skb, i, pfrag->page,
1572 skb_shinfo(skb)->nr_frags = ++i;
1573 get_page(pfrag->page);
1575 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1577 page_address(pfrag->page) + pfrag->offset,
1578 offset, copy, skb->len, skb) < 0)
1581 pfrag->offset += copy;
1582 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1584 skb->data_len += copy;
1585 skb->truesize += copy;
1586 wmem_alloc_delta += copy;
1592 if (wmem_alloc_delta)
1593 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1599 cork->length -= length;
1600 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1601 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1605 int ip6_append_data(struct sock *sk,
1606 int getfrag(void *from, char *to, int offset, int len,
1607 int odd, struct sk_buff *skb),
1608 void *from, int length, int transhdrlen,
1609 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1610 struct rt6_info *rt, unsigned int flags)
1612 struct inet_sock *inet = inet_sk(sk);
1613 struct ipv6_pinfo *np = inet6_sk(sk);
1617 if (flags&MSG_PROBE)
1619 if (skb_queue_empty(&sk->sk_write_queue)) {
1623 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1628 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1629 length += exthdrlen;
1630 transhdrlen += exthdrlen;
1632 fl6 = &inet->cork.fl.u.ip6;
1636 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1637 &np->cork, sk_page_frag(sk), getfrag,
1638 from, length, transhdrlen, flags, ipc6);
1640 EXPORT_SYMBOL_GPL(ip6_append_data);
1642 static void ip6_cork_release(struct inet_cork_full *cork,
1643 struct inet6_cork *v6_cork)
1646 kfree(v6_cork->opt->dst0opt);
1647 kfree(v6_cork->opt->dst1opt);
1648 kfree(v6_cork->opt->hopopt);
1649 kfree(v6_cork->opt->srcrt);
1650 kfree(v6_cork->opt);
1651 v6_cork->opt = NULL;
1654 if (cork->base.dst) {
1655 dst_release(cork->base.dst);
1656 cork->base.dst = NULL;
1657 cork->base.flags &= ~IPCORK_ALLFRAG;
1659 memset(&cork->fl, 0, sizeof(cork->fl));
1662 struct sk_buff *__ip6_make_skb(struct sock *sk,
1663 struct sk_buff_head *queue,
1664 struct inet_cork_full *cork,
1665 struct inet6_cork *v6_cork)
1667 struct sk_buff *skb, *tmp_skb;
1668 struct sk_buff **tail_skb;
1669 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1670 struct ipv6_pinfo *np = inet6_sk(sk);
1671 struct net *net = sock_net(sk);
1672 struct ipv6hdr *hdr;
1673 struct ipv6_txoptions *opt = v6_cork->opt;
1674 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1675 struct flowi6 *fl6 = &cork->fl.u.ip6;
1676 unsigned char proto = fl6->flowi6_proto;
1678 skb = __skb_dequeue(queue);
1681 tail_skb = &(skb_shinfo(skb)->frag_list);
1683 /* move skb->data to ip header from ext header */
1684 if (skb->data < skb_network_header(skb))
1685 __skb_pull(skb, skb_network_offset(skb));
1686 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1687 __skb_pull(tmp_skb, skb_network_header_len(skb));
1688 *tail_skb = tmp_skb;
1689 tail_skb = &(tmp_skb->next);
1690 skb->len += tmp_skb->len;
1691 skb->data_len += tmp_skb->len;
1692 skb->truesize += tmp_skb->truesize;
1693 tmp_skb->destructor = NULL;
1697 /* Allow local fragmentation. */
1698 skb->ignore_df = ip6_sk_ignore_df(sk);
1700 *final_dst = fl6->daddr;
1701 __skb_pull(skb, skb_network_header_len(skb));
1702 if (opt && opt->opt_flen)
1703 ipv6_push_frag_opts(skb, opt, &proto);
1704 if (opt && opt->opt_nflen)
1705 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1707 skb_push(skb, sizeof(struct ipv6hdr));
1708 skb_reset_network_header(skb);
1709 hdr = ipv6_hdr(skb);
1711 ip6_flow_hdr(hdr, v6_cork->tclass,
1712 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1713 ip6_autoflowlabel(net, np), fl6));
1714 hdr->hop_limit = v6_cork->hop_limit;
1715 hdr->nexthdr = proto;
1716 hdr->saddr = fl6->saddr;
1717 hdr->daddr = *final_dst;
1719 skb->priority = sk->sk_priority;
1720 skb->mark = sk->sk_mark;
1722 skb->tstamp = cork->base.transmit_time;
1724 skb_dst_set(skb, dst_clone(&rt->dst));
1725 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1726 if (proto == IPPROTO_ICMPV6) {
1727 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1729 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1730 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1733 ip6_cork_release(cork, v6_cork);
1738 int ip6_send_skb(struct sk_buff *skb)
1740 struct net *net = sock_net(skb->sk);
1741 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1744 err = ip6_local_out(net, skb->sk, skb);
1747 err = net_xmit_errno(err);
1749 IP6_INC_STATS(net, rt->rt6i_idev,
1750 IPSTATS_MIB_OUTDISCARDS);
1756 int ip6_push_pending_frames(struct sock *sk)
1758 struct sk_buff *skb;
1760 skb = ip6_finish_skb(sk);
1764 return ip6_send_skb(skb);
1766 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1768 static void __ip6_flush_pending_frames(struct sock *sk,
1769 struct sk_buff_head *queue,
1770 struct inet_cork_full *cork,
1771 struct inet6_cork *v6_cork)
1773 struct sk_buff *skb;
1775 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1777 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1778 IPSTATS_MIB_OUTDISCARDS);
1782 ip6_cork_release(cork, v6_cork);
1785 void ip6_flush_pending_frames(struct sock *sk)
1787 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1788 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1790 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1792 struct sk_buff *ip6_make_skb(struct sock *sk,
1793 int getfrag(void *from, char *to, int offset,
1794 int len, int odd, struct sk_buff *skb),
1795 void *from, int length, int transhdrlen,
1796 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1797 struct rt6_info *rt, unsigned int flags,
1798 struct inet_cork_full *cork)
1800 struct inet6_cork v6_cork;
1801 struct sk_buff_head queue;
1802 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1805 if (flags & MSG_PROBE)
1808 __skb_queue_head_init(&queue);
1810 cork->base.flags = 0;
1811 cork->base.addr = 0;
1812 cork->base.opt = NULL;
1813 cork->base.dst = NULL;
1815 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1817 ip6_cork_release(cork, &v6_cork);
1818 return ERR_PTR(err);
1820 if (ipc6->dontfrag < 0)
1821 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1823 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1824 ¤t->task_frag, getfrag, from,
1825 length + exthdrlen, transhdrlen + exthdrlen,
1828 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1829 return ERR_PTR(err);
1832 return __ip6_make_skb(sk, &queue, cork, &v6_cork);