GNU Linux-libre 5.13.14-gnu1
[releases.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         unsigned int hh_len = LL_RESERVED_SPACE(dev);
64         int delta = hh_len - skb_headroom(skb);
65         const struct in6_addr *nexthop;
66         struct neighbour *neigh;
67         int ret;
68
69         /* Be paranoid, rather than too clever. */
70         if (unlikely(delta > 0) && dev->header_ops) {
71                 /* pskb_expand_head() might crash, if skb is shared */
72                 if (skb_shared(skb)) {
73                         struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
74
75                         if (likely(nskb)) {
76                                 if (skb->sk)
77                                         skb_set_owner_w(nskb, skb->sk);
78                                 consume_skb(skb);
79                         } else {
80                                 kfree_skb(skb);
81                         }
82                         skb = nskb;
83                 }
84                 if (skb &&
85                     pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86                         kfree_skb(skb);
87                         skb = NULL;
88                 }
89                 if (!skb) {
90                         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
91                         return -ENOMEM;
92                 }
93         }
94
95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
97
98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
99                     ((mroute6_is_socket(net, skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         net, sk, newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(net, idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120
121                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
122
123                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
124                     IPV6_ADDR_SCOPE_NODELOCAL &&
125                     !(dev->flags & IFF_LOOPBACK)) {
126                         kfree_skb(skb);
127                         return 0;
128                 }
129         }
130
131         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
132                 int res = lwtunnel_xmit(skb);
133
134                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
135                         return res;
136         }
137
138         rcu_read_lock_bh();
139         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
140         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
141         if (unlikely(!neigh))
142                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
143         if (!IS_ERR(neigh)) {
144                 sock_confirm_neigh(skb, neigh);
145                 ret = neigh_output(neigh, skb, false);
146                 rcu_read_unlock_bh();
147                 return ret;
148         }
149         rcu_read_unlock_bh();
150
151         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
152         kfree_skb(skb);
153         return -EINVAL;
154 }
155
156 static int
157 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
158                                     struct sk_buff *skb, unsigned int mtu)
159 {
160         struct sk_buff *segs, *nskb;
161         netdev_features_t features;
162         int ret = 0;
163
164         /* Please see corresponding comment in ip_finish_output_gso
165          * describing the cases where GSO segment length exceeds the
166          * egress MTU.
167          */
168         features = netif_skb_features(skb);
169         segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
170         if (IS_ERR_OR_NULL(segs)) {
171                 kfree_skb(skb);
172                 return -ENOMEM;
173         }
174
175         consume_skb(skb);
176
177         skb_list_walk_safe(segs, segs, nskb) {
178                 int err;
179
180                 skb_mark_not_on_list(segs);
181                 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
182                 if (err && ret == 0)
183                         ret = err;
184         }
185
186         return ret;
187 }
188
189 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
190 {
191         unsigned int mtu;
192
193 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
194         /* Policy lookup after SNAT yielded a new policy */
195         if (skb_dst(skb)->xfrm) {
196                 IPCB(skb)->flags |= IPSKB_REROUTED;
197                 return dst_output(net, sk, skb);
198         }
199 #endif
200
201         mtu = ip6_skb_dst_mtu(skb);
202         if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
203                 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
204
205         if ((skb->len > mtu && !skb_is_gso(skb)) ||
206             dst_allfrag(skb_dst(skb)) ||
207             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
208                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
209         else
210                 return ip6_finish_output2(net, sk, skb);
211 }
212
213 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
214 {
215         int ret;
216
217         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
218         switch (ret) {
219         case NET_XMIT_SUCCESS:
220                 return __ip6_finish_output(net, sk, skb);
221         case NET_XMIT_CN:
222                 return __ip6_finish_output(net, sk, skb) ? : ret;
223         default:
224                 kfree_skb(skb);
225                 return ret;
226         }
227 }
228
229 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
230 {
231         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
232         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
233
234         skb->protocol = htons(ETH_P_IPV6);
235         skb->dev = dev;
236
237         if (unlikely(idev->cnf.disable_ipv6)) {
238                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
239                 kfree_skb(skb);
240                 return 0;
241         }
242
243         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
244                             net, sk, skb, indev, dev,
245                             ip6_finish_output,
246                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
247 }
248 EXPORT_SYMBOL(ip6_output);
249
250 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
251 {
252         if (!np->autoflowlabel_set)
253                 return ip6_default_np_autolabel(net);
254         else
255                 return np->autoflowlabel;
256 }
257
258 /*
259  * xmit an sk_buff (used by TCP, SCTP and DCCP)
260  * Note : socket lock is not held for SYNACK packets, but might be modified
261  * by calls to skb_set_owner_w() and ipv6_local_error(),
262  * which are using proper atomic operations or spinlocks.
263  */
264 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
265              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
266 {
267         struct net *net = sock_net(sk);
268         const struct ipv6_pinfo *np = inet6_sk(sk);
269         struct in6_addr *first_hop = &fl6->daddr;
270         struct dst_entry *dst = skb_dst(skb);
271         unsigned int head_room;
272         struct ipv6hdr *hdr;
273         u8  proto = fl6->flowi6_proto;
274         int seg_len = skb->len;
275         int hlimit = -1;
276         u32 mtu;
277
278         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
279         if (opt)
280                 head_room += opt->opt_nflen + opt->opt_flen;
281
282         if (unlikely(skb_headroom(skb) < head_room)) {
283                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
284                 if (!skb2) {
285                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
286                                       IPSTATS_MIB_OUTDISCARDS);
287                         kfree_skb(skb);
288                         return -ENOBUFS;
289                 }
290                 if (skb->sk)
291                         skb_set_owner_w(skb2, skb->sk);
292                 consume_skb(skb);
293                 skb = skb2;
294         }
295
296         if (opt) {
297                 seg_len += opt->opt_nflen + opt->opt_flen;
298
299                 if (opt->opt_flen)
300                         ipv6_push_frag_opts(skb, opt, &proto);
301
302                 if (opt->opt_nflen)
303                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
304                                              &fl6->saddr);
305         }
306
307         skb_push(skb, sizeof(struct ipv6hdr));
308         skb_reset_network_header(skb);
309         hdr = ipv6_hdr(skb);
310
311         /*
312          *      Fill in the IPv6 header
313          */
314         if (np)
315                 hlimit = np->hop_limit;
316         if (hlimit < 0)
317                 hlimit = ip6_dst_hoplimit(dst);
318
319         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
320                                 ip6_autoflowlabel(net, np), fl6));
321
322         hdr->payload_len = htons(seg_len);
323         hdr->nexthdr = proto;
324         hdr->hop_limit = hlimit;
325
326         hdr->saddr = fl6->saddr;
327         hdr->daddr = *first_hop;
328
329         skb->protocol = htons(ETH_P_IPV6);
330         skb->priority = priority;
331         skb->mark = mark;
332
333         mtu = dst_mtu(dst);
334         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
335                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
336                               IPSTATS_MIB_OUT, skb->len);
337
338                 /* if egress device is enslaved to an L3 master device pass the
339                  * skb to its handler for processing
340                  */
341                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
342                 if (unlikely(!skb))
343                         return 0;
344
345                 /* hooks should never assume socket lock is held.
346                  * we promote our socket to non const
347                  */
348                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
349                                net, (struct sock *)sk, skb, NULL, dst->dev,
350                                dst_output);
351         }
352
353         skb->dev = dst->dev;
354         /* ipv6_local_error() does not require socket lock,
355          * we promote our socket to non const
356          */
357         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
358
359         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
360         kfree_skb(skb);
361         return -EMSGSIZE;
362 }
363 EXPORT_SYMBOL(ip6_xmit);
364
365 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
366 {
367         struct ip6_ra_chain *ra;
368         struct sock *last = NULL;
369
370         read_lock(&ip6_ra_lock);
371         for (ra = ip6_ra_chain; ra; ra = ra->next) {
372                 struct sock *sk = ra->sk;
373                 if (sk && ra->sel == sel &&
374                     (!sk->sk_bound_dev_if ||
375                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
376                         struct ipv6_pinfo *np = inet6_sk(sk);
377
378                         if (np && np->rtalert_isolate &&
379                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
380                                 continue;
381                         }
382                         if (last) {
383                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
384                                 if (skb2)
385                                         rawv6_rcv(last, skb2);
386                         }
387                         last = sk;
388                 }
389         }
390
391         if (last) {
392                 rawv6_rcv(last, skb);
393                 read_unlock(&ip6_ra_lock);
394                 return 1;
395         }
396         read_unlock(&ip6_ra_lock);
397         return 0;
398 }
399
400 static int ip6_forward_proxy_check(struct sk_buff *skb)
401 {
402         struct ipv6hdr *hdr = ipv6_hdr(skb);
403         u8 nexthdr = hdr->nexthdr;
404         __be16 frag_off;
405         int offset;
406
407         if (ipv6_ext_hdr(nexthdr)) {
408                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
409                 if (offset < 0)
410                         return 0;
411         } else
412                 offset = sizeof(struct ipv6hdr);
413
414         if (nexthdr == IPPROTO_ICMPV6) {
415                 struct icmp6hdr *icmp6;
416
417                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
418                                          offset + 1 - skb->data)))
419                         return 0;
420
421                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
422
423                 switch (icmp6->icmp6_type) {
424                 case NDISC_ROUTER_SOLICITATION:
425                 case NDISC_ROUTER_ADVERTISEMENT:
426                 case NDISC_NEIGHBOUR_SOLICITATION:
427                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
428                 case NDISC_REDIRECT:
429                         /* For reaction involving unicast neighbor discovery
430                          * message destined to the proxied address, pass it to
431                          * input function.
432                          */
433                         return 1;
434                 default:
435                         break;
436                 }
437         }
438
439         /*
440          * The proxying router can't forward traffic sent to a link-local
441          * address, so signal the sender and discard the packet. This
442          * behavior is clarified by the MIPv6 specification.
443          */
444         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
445                 dst_link_failure(skb);
446                 return -1;
447         }
448
449         return 0;
450 }
451
452 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
453                                      struct sk_buff *skb)
454 {
455         struct dst_entry *dst = skb_dst(skb);
456
457         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
458         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
459
460 #ifdef CONFIG_NET_SWITCHDEV
461         if (skb->offload_l3_fwd_mark) {
462                 consume_skb(skb);
463                 return 0;
464         }
465 #endif
466
467         skb->tstamp = 0;
468         return dst_output(net, sk, skb);
469 }
470
471 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
472 {
473         if (skb->len <= mtu)
474                 return false;
475
476         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
477         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
478                 return true;
479
480         if (skb->ignore_df)
481                 return false;
482
483         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
484                 return false;
485
486         return true;
487 }
488
489 int ip6_forward(struct sk_buff *skb)
490 {
491         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
492         struct dst_entry *dst = skb_dst(skb);
493         struct ipv6hdr *hdr = ipv6_hdr(skb);
494         struct inet6_skb_parm *opt = IP6CB(skb);
495         struct net *net = dev_net(dst->dev);
496         u32 mtu;
497
498         if (net->ipv6.devconf_all->forwarding == 0)
499                 goto error;
500
501         if (skb->pkt_type != PACKET_HOST)
502                 goto drop;
503
504         if (unlikely(skb->sk))
505                 goto drop;
506
507         if (skb_warn_if_lro(skb))
508                 goto drop;
509
510         if (!net->ipv6.devconf_all->disable_policy &&
511             !idev->cnf.disable_policy &&
512             !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
513                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
514                 goto drop;
515         }
516
517         skb_forward_csum(skb);
518
519         /*
520          *      We DO NOT make any processing on
521          *      RA packets, pushing them to user level AS IS
522          *      without ane WARRANTY that application will be able
523          *      to interpret them. The reason is that we
524          *      cannot make anything clever here.
525          *
526          *      We are not end-node, so that if packet contains
527          *      AH/ESP, we cannot make anything.
528          *      Defragmentation also would be mistake, RA packets
529          *      cannot be fragmented, because there is no warranty
530          *      that different fragments will go along one path. --ANK
531          */
532         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
533                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
534                         return 0;
535         }
536
537         /*
538          *      check and decrement ttl
539          */
540         if (hdr->hop_limit <= 1) {
541                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
542                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
543
544                 kfree_skb(skb);
545                 return -ETIMEDOUT;
546         }
547
548         /* XXX: idev->cnf.proxy_ndp? */
549         if (net->ipv6.devconf_all->proxy_ndp &&
550             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
551                 int proxied = ip6_forward_proxy_check(skb);
552                 if (proxied > 0)
553                         return ip6_input(skb);
554                 else if (proxied < 0) {
555                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
556                         goto drop;
557                 }
558         }
559
560         if (!xfrm6_route_forward(skb)) {
561                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
562                 goto drop;
563         }
564         dst = skb_dst(skb);
565
566         /* IPv6 specs say nothing about it, but it is clear that we cannot
567            send redirects to source routed frames.
568            We don't send redirects to frames decapsulated from IPsec.
569          */
570         if (IP6CB(skb)->iif == dst->dev->ifindex &&
571             opt->srcrt == 0 && !skb_sec_path(skb)) {
572                 struct in6_addr *target = NULL;
573                 struct inet_peer *peer;
574                 struct rt6_info *rt;
575
576                 /*
577                  *      incoming and outgoing devices are the same
578                  *      send a redirect.
579                  */
580
581                 rt = (struct rt6_info *) dst;
582                 if (rt->rt6i_flags & RTF_GATEWAY)
583                         target = &rt->rt6i_gateway;
584                 else
585                         target = &hdr->daddr;
586
587                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
588
589                 /* Limit redirects both by destination (here)
590                    and by source (inside ndisc_send_redirect)
591                  */
592                 if (inet_peer_xrlim_allow(peer, 1*HZ))
593                         ndisc_send_redirect(skb, target);
594                 if (peer)
595                         inet_putpeer(peer);
596         } else {
597                 int addrtype = ipv6_addr_type(&hdr->saddr);
598
599                 /* This check is security critical. */
600                 if (addrtype == IPV6_ADDR_ANY ||
601                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
602                         goto error;
603                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
604                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
605                                     ICMPV6_NOT_NEIGHBOUR, 0);
606                         goto error;
607                 }
608         }
609
610         mtu = ip6_dst_mtu_forward(dst);
611         if (mtu < IPV6_MIN_MTU)
612                 mtu = IPV6_MIN_MTU;
613
614         if (ip6_pkt_too_big(skb, mtu)) {
615                 /* Again, force OUTPUT device used as source address */
616                 skb->dev = dst->dev;
617                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
618                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
619                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
620                                 IPSTATS_MIB_FRAGFAILS);
621                 kfree_skb(skb);
622                 return -EMSGSIZE;
623         }
624
625         if (skb_cow(skb, dst->dev->hard_header_len)) {
626                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
627                                 IPSTATS_MIB_OUTDISCARDS);
628                 goto drop;
629         }
630
631         hdr = ipv6_hdr(skb);
632
633         /* Mangling hops number delayed to point after skb COW */
634
635         hdr->hop_limit--;
636
637         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
638                        net, NULL, skb, skb->dev, dst->dev,
639                        ip6_forward_finish);
640
641 error:
642         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
643 drop:
644         kfree_skb(skb);
645         return -EINVAL;
646 }
647
648 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
649 {
650         to->pkt_type = from->pkt_type;
651         to->priority = from->priority;
652         to->protocol = from->protocol;
653         skb_dst_drop(to);
654         skb_dst_set(to, dst_clone(skb_dst(from)));
655         to->dev = from->dev;
656         to->mark = from->mark;
657
658         skb_copy_hash(to, from);
659
660 #ifdef CONFIG_NET_SCHED
661         to->tc_index = from->tc_index;
662 #endif
663         nf_copy(to, from);
664         skb_ext_copy(to, from);
665         skb_copy_secmark(to, from);
666 }
667
668 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
669                       u8 nexthdr, __be32 frag_id,
670                       struct ip6_fraglist_iter *iter)
671 {
672         unsigned int first_len;
673         struct frag_hdr *fh;
674
675         /* BUILD HEADER */
676         *prevhdr = NEXTHDR_FRAGMENT;
677         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
678         if (!iter->tmp_hdr)
679                 return -ENOMEM;
680
681         iter->frag = skb_shinfo(skb)->frag_list;
682         skb_frag_list_init(skb);
683
684         iter->offset = 0;
685         iter->hlen = hlen;
686         iter->frag_id = frag_id;
687         iter->nexthdr = nexthdr;
688
689         __skb_pull(skb, hlen);
690         fh = __skb_push(skb, sizeof(struct frag_hdr));
691         __skb_push(skb, hlen);
692         skb_reset_network_header(skb);
693         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
694
695         fh->nexthdr = nexthdr;
696         fh->reserved = 0;
697         fh->frag_off = htons(IP6_MF);
698         fh->identification = frag_id;
699
700         first_len = skb_pagelen(skb);
701         skb->data_len = first_len - skb_headlen(skb);
702         skb->len = first_len;
703         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
704
705         return 0;
706 }
707 EXPORT_SYMBOL(ip6_fraglist_init);
708
709 void ip6_fraglist_prepare(struct sk_buff *skb,
710                           struct ip6_fraglist_iter *iter)
711 {
712         struct sk_buff *frag = iter->frag;
713         unsigned int hlen = iter->hlen;
714         struct frag_hdr *fh;
715
716         frag->ip_summed = CHECKSUM_NONE;
717         skb_reset_transport_header(frag);
718         fh = __skb_push(frag, sizeof(struct frag_hdr));
719         __skb_push(frag, hlen);
720         skb_reset_network_header(frag);
721         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
722         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
723         fh->nexthdr = iter->nexthdr;
724         fh->reserved = 0;
725         fh->frag_off = htons(iter->offset);
726         if (frag->next)
727                 fh->frag_off |= htons(IP6_MF);
728         fh->identification = iter->frag_id;
729         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
730         ip6_copy_metadata(frag, skb);
731 }
732 EXPORT_SYMBOL(ip6_fraglist_prepare);
733
734 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
735                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
736                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
737 {
738         state->prevhdr = prevhdr;
739         state->nexthdr = nexthdr;
740         state->frag_id = frag_id;
741
742         state->hlen = hlen;
743         state->mtu = mtu;
744
745         state->left = skb->len - hlen;  /* Space per frame */
746         state->ptr = hlen;              /* Where to start from */
747
748         state->hroom = hdr_room;
749         state->troom = needed_tailroom;
750
751         state->offset = 0;
752 }
753 EXPORT_SYMBOL(ip6_frag_init);
754
755 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
756 {
757         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
758         struct sk_buff *frag;
759         struct frag_hdr *fh;
760         unsigned int len;
761
762         len = state->left;
763         /* IF: it doesn't fit, use 'mtu' - the data space left */
764         if (len > state->mtu)
765                 len = state->mtu;
766         /* IF: we are not sending up to and including the packet end
767            then align the next start on an eight byte boundary */
768         if (len < state->left)
769                 len &= ~7;
770
771         /* Allocate buffer */
772         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
773                          state->hroom + state->troom, GFP_ATOMIC);
774         if (!frag)
775                 return ERR_PTR(-ENOMEM);
776
777         /*
778          *      Set up data on packet
779          */
780
781         ip6_copy_metadata(frag, skb);
782         skb_reserve(frag, state->hroom);
783         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
784         skb_reset_network_header(frag);
785         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
786         frag->transport_header = (frag->network_header + state->hlen +
787                                   sizeof(struct frag_hdr));
788
789         /*
790          *      Charge the memory for the fragment to any owner
791          *      it might possess
792          */
793         if (skb->sk)
794                 skb_set_owner_w(frag, skb->sk);
795
796         /*
797          *      Copy the packet header into the new buffer.
798          */
799         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
800
801         fragnexthdr_offset = skb_network_header(frag);
802         fragnexthdr_offset += prevhdr - skb_network_header(skb);
803         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
804
805         /*
806          *      Build fragment header.
807          */
808         fh->nexthdr = state->nexthdr;
809         fh->reserved = 0;
810         fh->identification = state->frag_id;
811
812         /*
813          *      Copy a block of the IP datagram.
814          */
815         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
816                              len));
817         state->left -= len;
818
819         fh->frag_off = htons(state->offset);
820         if (state->left > 0)
821                 fh->frag_off |= htons(IP6_MF);
822         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
823
824         state->ptr += len;
825         state->offset += len;
826
827         return frag;
828 }
829 EXPORT_SYMBOL(ip6_frag_next);
830
831 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
832                  int (*output)(struct net *, struct sock *, struct sk_buff *))
833 {
834         struct sk_buff *frag;
835         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
836         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
837                                 inet6_sk(skb->sk) : NULL;
838         struct ip6_frag_state state;
839         unsigned int mtu, hlen, nexthdr_offset;
840         ktime_t tstamp = skb->tstamp;
841         int hroom, err = 0;
842         __be32 frag_id;
843         u8 *prevhdr, nexthdr = 0;
844
845         err = ip6_find_1stfragopt(skb, &prevhdr);
846         if (err < 0)
847                 goto fail;
848         hlen = err;
849         nexthdr = *prevhdr;
850         nexthdr_offset = prevhdr - skb_network_header(skb);
851
852         mtu = ip6_skb_dst_mtu(skb);
853
854         /* We must not fragment if the socket is set to force MTU discovery
855          * or if the skb it not generated by a local socket.
856          */
857         if (unlikely(!skb->ignore_df && skb->len > mtu))
858                 goto fail_toobig;
859
860         if (IP6CB(skb)->frag_max_size) {
861                 if (IP6CB(skb)->frag_max_size > mtu)
862                         goto fail_toobig;
863
864                 /* don't send fragments larger than what we received */
865                 mtu = IP6CB(skb)->frag_max_size;
866                 if (mtu < IPV6_MIN_MTU)
867                         mtu = IPV6_MIN_MTU;
868         }
869
870         if (np && np->frag_size < mtu) {
871                 if (np->frag_size)
872                         mtu = np->frag_size;
873         }
874         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
875                 goto fail_toobig;
876         mtu -= hlen + sizeof(struct frag_hdr);
877
878         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
879                                     &ipv6_hdr(skb)->saddr);
880
881         if (skb->ip_summed == CHECKSUM_PARTIAL &&
882             (err = skb_checksum_help(skb)))
883                 goto fail;
884
885         prevhdr = skb_network_header(skb) + nexthdr_offset;
886         hroom = LL_RESERVED_SPACE(rt->dst.dev);
887         if (skb_has_frag_list(skb)) {
888                 unsigned int first_len = skb_pagelen(skb);
889                 struct ip6_fraglist_iter iter;
890                 struct sk_buff *frag2;
891
892                 if (first_len - hlen > mtu ||
893                     ((first_len - hlen) & 7) ||
894                     skb_cloned(skb) ||
895                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
896                         goto slow_path;
897
898                 skb_walk_frags(skb, frag) {
899                         /* Correct geometry. */
900                         if (frag->len > mtu ||
901                             ((frag->len & 7) && frag->next) ||
902                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
903                                 goto slow_path_clean;
904
905                         /* Partially cloned skb? */
906                         if (skb_shared(frag))
907                                 goto slow_path_clean;
908
909                         BUG_ON(frag->sk);
910                         if (skb->sk) {
911                                 frag->sk = skb->sk;
912                                 frag->destructor = sock_wfree;
913                         }
914                         skb->truesize -= frag->truesize;
915                 }
916
917                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
918                                         &iter);
919                 if (err < 0)
920                         goto fail;
921
922                 for (;;) {
923                         /* Prepare header of the next frame,
924                          * before previous one went down. */
925                         if (iter.frag)
926                                 ip6_fraglist_prepare(skb, &iter);
927
928                         skb->tstamp = tstamp;
929                         err = output(net, sk, skb);
930                         if (!err)
931                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
932                                               IPSTATS_MIB_FRAGCREATES);
933
934                         if (err || !iter.frag)
935                                 break;
936
937                         skb = ip6_fraglist_next(&iter);
938                 }
939
940                 kfree(iter.tmp_hdr);
941
942                 if (err == 0) {
943                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
944                                       IPSTATS_MIB_FRAGOKS);
945                         return 0;
946                 }
947
948                 kfree_skb_list(iter.frag);
949
950                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
951                               IPSTATS_MIB_FRAGFAILS);
952                 return err;
953
954 slow_path_clean:
955                 skb_walk_frags(skb, frag2) {
956                         if (frag2 == frag)
957                                 break;
958                         frag2->sk = NULL;
959                         frag2->destructor = NULL;
960                         skb->truesize += frag2->truesize;
961                 }
962         }
963
964 slow_path:
965         /*
966          *      Fragment the datagram.
967          */
968
969         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
970                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
971                       &state);
972
973         /*
974          *      Keep copying data until we run out.
975          */
976
977         while (state.left > 0) {
978                 frag = ip6_frag_next(skb, &state);
979                 if (IS_ERR(frag)) {
980                         err = PTR_ERR(frag);
981                         goto fail;
982                 }
983
984                 /*
985                  *      Put this fragment into the sending queue.
986                  */
987                 frag->tstamp = tstamp;
988                 err = output(net, sk, frag);
989                 if (err)
990                         goto fail;
991
992                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
993                               IPSTATS_MIB_FRAGCREATES);
994         }
995         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
996                       IPSTATS_MIB_FRAGOKS);
997         consume_skb(skb);
998         return err;
999
1000 fail_toobig:
1001         if (skb->sk && dst_allfrag(skb_dst(skb)))
1002                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
1003
1004         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1005         err = -EMSGSIZE;
1006
1007 fail:
1008         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
1009                       IPSTATS_MIB_FRAGFAILS);
1010         kfree_skb(skb);
1011         return err;
1012 }
1013
1014 static inline int ip6_rt_check(const struct rt6key *rt_key,
1015                                const struct in6_addr *fl_addr,
1016                                const struct in6_addr *addr_cache)
1017 {
1018         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
1019                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
1020 }
1021
1022 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
1023                                           struct dst_entry *dst,
1024                                           const struct flowi6 *fl6)
1025 {
1026         struct ipv6_pinfo *np = inet6_sk(sk);
1027         struct rt6_info *rt;
1028
1029         if (!dst)
1030                 goto out;
1031
1032         if (dst->ops->family != AF_INET6) {
1033                 dst_release(dst);
1034                 return NULL;
1035         }
1036
1037         rt = (struct rt6_info *)dst;
1038         /* Yes, checking route validity in not connected
1039          * case is not very simple. Take into account,
1040          * that we do not support routing by source, TOS,
1041          * and MSG_DONTROUTE            --ANK (980726)
1042          *
1043          * 1. ip6_rt_check(): If route was host route,
1044          *    check that cached destination is current.
1045          *    If it is network route, we still may
1046          *    check its validity using saved pointer
1047          *    to the last used address: daddr_cache.
1048          *    We do not want to save whole address now,
1049          *    (because main consumer of this service
1050          *    is tcp, which has not this problem),
1051          *    so that the last trick works only on connected
1052          *    sockets.
1053          * 2. oif also should be the same.
1054          */
1055         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1056 #ifdef CONFIG_IPV6_SUBTREES
1057             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1058 #endif
1059            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1060               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1061                 dst_release(dst);
1062                 dst = NULL;
1063         }
1064
1065 out:
1066         return dst;
1067 }
1068
1069 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1070                                struct dst_entry **dst, struct flowi6 *fl6)
1071 {
1072 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1073         struct neighbour *n;
1074         struct rt6_info *rt;
1075 #endif
1076         int err;
1077         int flags = 0;
1078
1079         /* The correct way to handle this would be to do
1080          * ip6_route_get_saddr, and then ip6_route_output; however,
1081          * the route-specific preferred source forces the
1082          * ip6_route_output call _before_ ip6_route_get_saddr.
1083          *
1084          * In source specific routing (no src=any default route),
1085          * ip6_route_output will fail given src=any saddr, though, so
1086          * that's why we try it again later.
1087          */
1088         if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1089                 struct fib6_info *from;
1090                 struct rt6_info *rt;
1091                 bool had_dst = *dst != NULL;
1092
1093                 if (!had_dst)
1094                         *dst = ip6_route_output(net, sk, fl6);
1095                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1096
1097                 rcu_read_lock();
1098                 from = rt ? rcu_dereference(rt->from) : NULL;
1099                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1100                                           sk ? inet6_sk(sk)->srcprefs : 0,
1101                                           &fl6->saddr);
1102                 rcu_read_unlock();
1103
1104                 if (err)
1105                         goto out_err_release;
1106
1107                 /* If we had an erroneous initial result, pretend it
1108                  * never existed and let the SA-enabled version take
1109                  * over.
1110                  */
1111                 if (!had_dst && (*dst)->error) {
1112                         dst_release(*dst);
1113                         *dst = NULL;
1114                 }
1115
1116                 if (fl6->flowi6_oif)
1117                         flags |= RT6_LOOKUP_F_IFACE;
1118         }
1119
1120         if (!*dst)
1121                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1122
1123         err = (*dst)->error;
1124         if (err)
1125                 goto out_err_release;
1126
1127 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1128         /*
1129          * Here if the dst entry we've looked up
1130          * has a neighbour entry that is in the INCOMPLETE
1131          * state and the src address from the flow is
1132          * marked as OPTIMISTIC, we release the found
1133          * dst entry and replace it instead with the
1134          * dst entry of the nexthop router
1135          */
1136         rt = (struct rt6_info *) *dst;
1137         rcu_read_lock_bh();
1138         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1139                                       rt6_nexthop(rt, &fl6->daddr));
1140         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1141         rcu_read_unlock_bh();
1142
1143         if (err) {
1144                 struct inet6_ifaddr *ifp;
1145                 struct flowi6 fl_gw6;
1146                 int redirect;
1147
1148                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1149                                       (*dst)->dev, 1);
1150
1151                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1152                 if (ifp)
1153                         in6_ifa_put(ifp);
1154
1155                 if (redirect) {
1156                         /*
1157                          * We need to get the dst entry for the
1158                          * default router instead
1159                          */
1160                         dst_release(*dst);
1161                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1162                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1163                         *dst = ip6_route_output(net, sk, &fl_gw6);
1164                         err = (*dst)->error;
1165                         if (err)
1166                                 goto out_err_release;
1167                 }
1168         }
1169 #endif
1170         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1171             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1172                 err = -EAFNOSUPPORT;
1173                 goto out_err_release;
1174         }
1175
1176         return 0;
1177
1178 out_err_release:
1179         dst_release(*dst);
1180         *dst = NULL;
1181
1182         if (err == -ENETUNREACH)
1183                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1184         return err;
1185 }
1186
1187 /**
1188  *      ip6_dst_lookup - perform route lookup on flow
1189  *      @net: Network namespace to perform lookup in
1190  *      @sk: socket which provides route info
1191  *      @dst: pointer to dst_entry * for result
1192  *      @fl6: flow to lookup
1193  *
1194  *      This function performs a route lookup on the given flow.
1195  *
1196  *      It returns zero on success, or a standard errno code on error.
1197  */
1198 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1199                    struct flowi6 *fl6)
1200 {
1201         *dst = NULL;
1202         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1203 }
1204 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1205
1206 /**
1207  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1208  *      @net: Network namespace to perform lookup in
1209  *      @sk: socket which provides route info
1210  *      @fl6: flow to lookup
1211  *      @final_dst: final destination address for ipsec lookup
1212  *
1213  *      This function performs a route lookup on the given flow.
1214  *
1215  *      It returns a valid dst pointer on success, or a pointer encoded
1216  *      error code.
1217  */
1218 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1219                                       const struct in6_addr *final_dst)
1220 {
1221         struct dst_entry *dst = NULL;
1222         int err;
1223
1224         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1225         if (err)
1226                 return ERR_PTR(err);
1227         if (final_dst)
1228                 fl6->daddr = *final_dst;
1229
1230         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1231 }
1232 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1233
1234 /**
1235  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1236  *      @sk: socket which provides the dst cache and route info
1237  *      @fl6: flow to lookup
1238  *      @final_dst: final destination address for ipsec lookup
1239  *      @connected: whether @sk is connected or not
1240  *
1241  *      This function performs a route lookup on the given flow with the
1242  *      possibility of using the cached route in the socket if it is valid.
1243  *      It will take the socket dst lock when operating on the dst cache.
1244  *      As a result, this function can only be used in process context.
1245  *
1246  *      In addition, for a connected socket, cache the dst in the socket
1247  *      if the current cache is not valid.
1248  *
1249  *      It returns a valid dst pointer on success, or a pointer encoded
1250  *      error code.
1251  */
1252 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1253                                          const struct in6_addr *final_dst,
1254                                          bool connected)
1255 {
1256         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1257
1258         dst = ip6_sk_dst_check(sk, dst, fl6);
1259         if (dst)
1260                 return dst;
1261
1262         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1263         if (connected && !IS_ERR(dst))
1264                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1265
1266         return dst;
1267 }
1268 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1269
1270 /**
1271  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1272  *      @skb: Packet for which lookup is done
1273  *      @dev: Tunnel device
1274  *      @net: Network namespace of tunnel device
1275  *      @sock: Socket which provides route info
1276  *      @saddr: Memory to store the src ip address
1277  *      @info: Tunnel information
1278  *      @protocol: IP protocol
1279  *      @use_cache: Flag to enable cache usage
1280  *      This function performs a route lookup on a tunnel
1281  *
1282  *      It returns a valid dst pointer and stores src address to be used in
1283  *      tunnel in param saddr on success, else a pointer encoded error code.
1284  */
1285
1286 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1287                                         struct net_device *dev,
1288                                         struct net *net,
1289                                         struct socket *sock,
1290                                         struct in6_addr *saddr,
1291                                         const struct ip_tunnel_info *info,
1292                                         u8 protocol,
1293                                         bool use_cache)
1294 {
1295         struct dst_entry *dst = NULL;
1296 #ifdef CONFIG_DST_CACHE
1297         struct dst_cache *dst_cache;
1298 #endif
1299         struct flowi6 fl6;
1300         __u8 prio;
1301
1302 #ifdef CONFIG_DST_CACHE
1303         dst_cache = (struct dst_cache *)&info->dst_cache;
1304         if (use_cache) {
1305                 dst = dst_cache_get_ip6(dst_cache, saddr);
1306                 if (dst)
1307                         return dst;
1308         }
1309 #endif
1310         memset(&fl6, 0, sizeof(fl6));
1311         fl6.flowi6_mark = skb->mark;
1312         fl6.flowi6_proto = protocol;
1313         fl6.daddr = info->key.u.ipv6.dst;
1314         fl6.saddr = info->key.u.ipv6.src;
1315         prio = info->key.tos;
1316         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1317                                           info->key.label);
1318
1319         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1320                                               NULL);
1321         if (IS_ERR(dst)) {
1322                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1323                 return ERR_PTR(-ENETUNREACH);
1324         }
1325         if (dst->dev == dev) { /* is this necessary? */
1326                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1327                 dst_release(dst);
1328                 return ERR_PTR(-ELOOP);
1329         }
1330 #ifdef CONFIG_DST_CACHE
1331         if (use_cache)
1332                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1333 #endif
1334         *saddr = fl6.saddr;
1335         return dst;
1336 }
1337 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1338
1339 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1340                                                gfp_t gfp)
1341 {
1342         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1343 }
1344
1345 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1346                                                 gfp_t gfp)
1347 {
1348         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1349 }
1350
1351 static void ip6_append_data_mtu(unsigned int *mtu,
1352                                 int *maxfraglen,
1353                                 unsigned int fragheaderlen,
1354                                 struct sk_buff *skb,
1355                                 struct rt6_info *rt,
1356                                 unsigned int orig_mtu)
1357 {
1358         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1359                 if (!skb) {
1360                         /* first fragment, reserve header_len */
1361                         *mtu = orig_mtu - rt->dst.header_len;
1362
1363                 } else {
1364                         /*
1365                          * this fragment is not first, the headers
1366                          * space is regarded as data space.
1367                          */
1368                         *mtu = orig_mtu;
1369                 }
1370                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1371                               + fragheaderlen - sizeof(struct frag_hdr);
1372         }
1373 }
1374
1375 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1376                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1377                           struct rt6_info *rt, struct flowi6 *fl6)
1378 {
1379         struct ipv6_pinfo *np = inet6_sk(sk);
1380         unsigned int mtu;
1381         struct ipv6_txoptions *opt = ipc6->opt;
1382
1383         /*
1384          * setup for corking
1385          */
1386         if (opt) {
1387                 if (WARN_ON(v6_cork->opt))
1388                         return -EINVAL;
1389
1390                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1391                 if (unlikely(!v6_cork->opt))
1392                         return -ENOBUFS;
1393
1394                 v6_cork->opt->tot_len = sizeof(*opt);
1395                 v6_cork->opt->opt_flen = opt->opt_flen;
1396                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1397
1398                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1399                                                     sk->sk_allocation);
1400                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1401                         return -ENOBUFS;
1402
1403                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1404                                                     sk->sk_allocation);
1405                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1406                         return -ENOBUFS;
1407
1408                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1409                                                    sk->sk_allocation);
1410                 if (opt->hopopt && !v6_cork->opt->hopopt)
1411                         return -ENOBUFS;
1412
1413                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1414                                                     sk->sk_allocation);
1415                 if (opt->srcrt && !v6_cork->opt->srcrt)
1416                         return -ENOBUFS;
1417
1418                 /* need source address above miyazawa*/
1419         }
1420         dst_hold(&rt->dst);
1421         cork->base.dst = &rt->dst;
1422         cork->fl.u.ip6 = *fl6;
1423         v6_cork->hop_limit = ipc6->hlimit;
1424         v6_cork->tclass = ipc6->tclass;
1425         if (rt->dst.flags & DST_XFRM_TUNNEL)
1426                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1427                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1428         else
1429                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1430                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1431         if (np->frag_size < mtu) {
1432                 if (np->frag_size)
1433                         mtu = np->frag_size;
1434         }
1435         if (mtu < IPV6_MIN_MTU)
1436                 return -EINVAL;
1437         cork->base.fragsize = mtu;
1438         cork->base.gso_size = ipc6->gso_size;
1439         cork->base.tx_flags = 0;
1440         cork->base.mark = ipc6->sockc.mark;
1441         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1442
1443         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1444                 cork->base.flags |= IPCORK_ALLFRAG;
1445         cork->base.length = 0;
1446
1447         cork->base.transmit_time = ipc6->sockc.transmit_time;
1448
1449         return 0;
1450 }
1451
1452 static int __ip6_append_data(struct sock *sk,
1453                              struct flowi6 *fl6,
1454                              struct sk_buff_head *queue,
1455                              struct inet_cork *cork,
1456                              struct inet6_cork *v6_cork,
1457                              struct page_frag *pfrag,
1458                              int getfrag(void *from, char *to, int offset,
1459                                          int len, int odd, struct sk_buff *skb),
1460                              void *from, int length, int transhdrlen,
1461                              unsigned int flags, struct ipcm6_cookie *ipc6)
1462 {
1463         struct sk_buff *skb, *skb_prev = NULL;
1464         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1465         struct ubuf_info *uarg = NULL;
1466         int exthdrlen = 0;
1467         int dst_exthdrlen = 0;
1468         int hh_len;
1469         int copy;
1470         int err;
1471         int offset = 0;
1472         u32 tskey = 0;
1473         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1474         struct ipv6_txoptions *opt = v6_cork->opt;
1475         int csummode = CHECKSUM_NONE;
1476         unsigned int maxnonfragsize, headersize;
1477         unsigned int wmem_alloc_delta = 0;
1478         bool paged, extra_uref = false;
1479
1480         skb = skb_peek_tail(queue);
1481         if (!skb) {
1482                 exthdrlen = opt ? opt->opt_flen : 0;
1483                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1484         }
1485
1486         paged = !!cork->gso_size;
1487         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1488         orig_mtu = mtu;
1489
1490         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1491             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1492                 tskey = sk->sk_tskey++;
1493
1494         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1495
1496         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1497                         (opt ? opt->opt_nflen : 0);
1498         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1499                      sizeof(struct frag_hdr);
1500
1501         headersize = sizeof(struct ipv6hdr) +
1502                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1503                      (dst_allfrag(&rt->dst) ?
1504                       sizeof(struct frag_hdr) : 0) +
1505                      rt->rt6i_nfheader_len;
1506
1507         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1508          * the first fragment
1509          */
1510         if (headersize + transhdrlen > mtu)
1511                 goto emsgsize;
1512
1513         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1514             (sk->sk_protocol == IPPROTO_UDP ||
1515              sk->sk_protocol == IPPROTO_RAW)) {
1516                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1517                                 sizeof(struct ipv6hdr));
1518                 goto emsgsize;
1519         }
1520
1521         if (ip6_sk_ignore_df(sk))
1522                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1523         else
1524                 maxnonfragsize = mtu;
1525
1526         if (cork->length + length > maxnonfragsize - headersize) {
1527 emsgsize:
1528                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1529                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1530                 return -EMSGSIZE;
1531         }
1532
1533         /* CHECKSUM_PARTIAL only with no extension headers and when
1534          * we are not going to fragment
1535          */
1536         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1537             headersize == sizeof(struct ipv6hdr) &&
1538             length <= mtu - headersize &&
1539             (!(flags & MSG_MORE) || cork->gso_size) &&
1540             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1541                 csummode = CHECKSUM_PARTIAL;
1542
1543         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1544                 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1545                 if (!uarg)
1546                         return -ENOBUFS;
1547                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1548                 if (rt->dst.dev->features & NETIF_F_SG &&
1549                     csummode == CHECKSUM_PARTIAL) {
1550                         paged = true;
1551                 } else {
1552                         uarg->zerocopy = 0;
1553                         skb_zcopy_set(skb, uarg, &extra_uref);
1554                 }
1555         }
1556
1557         /*
1558          * Let's try using as much space as possible.
1559          * Use MTU if total length of the message fits into the MTU.
1560          * Otherwise, we need to reserve fragment header and
1561          * fragment alignment (= 8-15 octects, in total).
1562          *
1563          * Note that we may need to "move" the data from the tail
1564          * of the buffer to the new fragment when we split
1565          * the message.
1566          *
1567          * FIXME: It may be fragmented into multiple chunks
1568          *        at once if non-fragmentable extension headers
1569          *        are too large.
1570          * --yoshfuji
1571          */
1572
1573         cork->length += length;
1574         if (!skb)
1575                 goto alloc_new_skb;
1576
1577         while (length > 0) {
1578                 /* Check if the remaining data fits into current packet. */
1579                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1580                 if (copy < length)
1581                         copy = maxfraglen - skb->len;
1582
1583                 if (copy <= 0) {
1584                         char *data;
1585                         unsigned int datalen;
1586                         unsigned int fraglen;
1587                         unsigned int fraggap;
1588                         unsigned int alloclen, alloc_extra;
1589                         unsigned int pagedlen;
1590 alloc_new_skb:
1591                         /* There's no room in the current skb */
1592                         if (skb)
1593                                 fraggap = skb->len - maxfraglen;
1594                         else
1595                                 fraggap = 0;
1596                         /* update mtu and maxfraglen if necessary */
1597                         if (!skb || !skb_prev)
1598                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1599                                                     fragheaderlen, skb, rt,
1600                                                     orig_mtu);
1601
1602                         skb_prev = skb;
1603
1604                         /*
1605                          * If remaining data exceeds the mtu,
1606                          * we know we need more fragment(s).
1607                          */
1608                         datalen = length + fraggap;
1609
1610                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1611                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1612                         fraglen = datalen + fragheaderlen;
1613                         pagedlen = 0;
1614
1615                         alloc_extra = hh_len;
1616                         alloc_extra += dst_exthdrlen;
1617                         alloc_extra += rt->dst.trailer_len;
1618
1619                         /* We just reserve space for fragment header.
1620                          * Note: this may be overallocation if the message
1621                          * (without MSG_MORE) fits into the MTU.
1622                          */
1623                         alloc_extra += sizeof(struct frag_hdr);
1624
1625                         if ((flags & MSG_MORE) &&
1626                             !(rt->dst.dev->features&NETIF_F_SG))
1627                                 alloclen = mtu;
1628                         else if (!paged &&
1629                                  (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1630                                   !(rt->dst.dev->features & NETIF_F_SG)))
1631                                 alloclen = fraglen;
1632                         else {
1633                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1634                                 pagedlen = fraglen - alloclen;
1635                         }
1636                         alloclen += alloc_extra;
1637
1638                         if (datalen != length + fraggap) {
1639                                 /*
1640                                  * this is not the last fragment, the trailer
1641                                  * space is regarded as data space.
1642                                  */
1643                                 datalen += rt->dst.trailer_len;
1644                         }
1645
1646                         fraglen = datalen + fragheaderlen;
1647
1648                         copy = datalen - transhdrlen - fraggap - pagedlen;
1649                         if (copy < 0) {
1650                                 err = -EINVAL;
1651                                 goto error;
1652                         }
1653                         if (transhdrlen) {
1654                                 skb = sock_alloc_send_skb(sk, alloclen,
1655                                                 (flags & MSG_DONTWAIT), &err);
1656                         } else {
1657                                 skb = NULL;
1658                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1659                                     2 * sk->sk_sndbuf)
1660                                         skb = alloc_skb(alloclen,
1661                                                         sk->sk_allocation);
1662                                 if (unlikely(!skb))
1663                                         err = -ENOBUFS;
1664                         }
1665                         if (!skb)
1666                                 goto error;
1667                         /*
1668                          *      Fill in the control structures
1669                          */
1670                         skb->protocol = htons(ETH_P_IPV6);
1671                         skb->ip_summed = csummode;
1672                         skb->csum = 0;
1673                         /* reserve for fragmentation and ipsec header */
1674                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1675                                     dst_exthdrlen);
1676
1677                         /*
1678                          *      Find where to start putting bytes
1679                          */
1680                         data = skb_put(skb, fraglen - pagedlen);
1681                         skb_set_network_header(skb, exthdrlen);
1682                         data += fragheaderlen;
1683                         skb->transport_header = (skb->network_header +
1684                                                  fragheaderlen);
1685                         if (fraggap) {
1686                                 skb->csum = skb_copy_and_csum_bits(
1687                                         skb_prev, maxfraglen,
1688                                         data + transhdrlen, fraggap);
1689                                 skb_prev->csum = csum_sub(skb_prev->csum,
1690                                                           skb->csum);
1691                                 data += fraggap;
1692                                 pskb_trim_unique(skb_prev, maxfraglen);
1693                         }
1694                         if (copy > 0 &&
1695                             getfrag(from, data + transhdrlen, offset,
1696                                     copy, fraggap, skb) < 0) {
1697                                 err = -EFAULT;
1698                                 kfree_skb(skb);
1699                                 goto error;
1700                         }
1701
1702                         offset += copy;
1703                         length -= copy + transhdrlen;
1704                         transhdrlen = 0;
1705                         exthdrlen = 0;
1706                         dst_exthdrlen = 0;
1707
1708                         /* Only the initial fragment is time stamped */
1709                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1710                         cork->tx_flags = 0;
1711                         skb_shinfo(skb)->tskey = tskey;
1712                         tskey = 0;
1713                         skb_zcopy_set(skb, uarg, &extra_uref);
1714
1715                         if ((flags & MSG_CONFIRM) && !skb_prev)
1716                                 skb_set_dst_pending_confirm(skb, 1);
1717
1718                         /*
1719                          * Put the packet on the pending queue
1720                          */
1721                         if (!skb->destructor) {
1722                                 skb->destructor = sock_wfree;
1723                                 skb->sk = sk;
1724                                 wmem_alloc_delta += skb->truesize;
1725                         }
1726                         __skb_queue_tail(queue, skb);
1727                         continue;
1728                 }
1729
1730                 if (copy > length)
1731                         copy = length;
1732
1733                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1734                     skb_tailroom(skb) >= copy) {
1735                         unsigned int off;
1736
1737                         off = skb->len;
1738                         if (getfrag(from, skb_put(skb, copy),
1739                                                 offset, copy, off, skb) < 0) {
1740                                 __skb_trim(skb, off);
1741                                 err = -EFAULT;
1742                                 goto error;
1743                         }
1744                 } else if (!uarg || !uarg->zerocopy) {
1745                         int i = skb_shinfo(skb)->nr_frags;
1746
1747                         err = -ENOMEM;
1748                         if (!sk_page_frag_refill(sk, pfrag))
1749                                 goto error;
1750
1751                         if (!skb_can_coalesce(skb, i, pfrag->page,
1752                                               pfrag->offset)) {
1753                                 err = -EMSGSIZE;
1754                                 if (i == MAX_SKB_FRAGS)
1755                                         goto error;
1756
1757                                 __skb_fill_page_desc(skb, i, pfrag->page,
1758                                                      pfrag->offset, 0);
1759                                 skb_shinfo(skb)->nr_frags = ++i;
1760                                 get_page(pfrag->page);
1761                         }
1762                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1763                         if (getfrag(from,
1764                                     page_address(pfrag->page) + pfrag->offset,
1765                                     offset, copy, skb->len, skb) < 0)
1766                                 goto error_efault;
1767
1768                         pfrag->offset += copy;
1769                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1770                         skb->len += copy;
1771                         skb->data_len += copy;
1772                         skb->truesize += copy;
1773                         wmem_alloc_delta += copy;
1774                 } else {
1775                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1776                         if (err < 0)
1777                                 goto error;
1778                 }
1779                 offset += copy;
1780                 length -= copy;
1781         }
1782
1783         if (wmem_alloc_delta)
1784                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1785         return 0;
1786
1787 error_efault:
1788         err = -EFAULT;
1789 error:
1790         net_zcopy_put_abort(uarg, extra_uref);
1791         cork->length -= length;
1792         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1793         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1794         return err;
1795 }
1796
1797 int ip6_append_data(struct sock *sk,
1798                     int getfrag(void *from, char *to, int offset, int len,
1799                                 int odd, struct sk_buff *skb),
1800                     void *from, int length, int transhdrlen,
1801                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1802                     struct rt6_info *rt, unsigned int flags)
1803 {
1804         struct inet_sock *inet = inet_sk(sk);
1805         struct ipv6_pinfo *np = inet6_sk(sk);
1806         int exthdrlen;
1807         int err;
1808
1809         if (flags&MSG_PROBE)
1810                 return 0;
1811         if (skb_queue_empty(&sk->sk_write_queue)) {
1812                 /*
1813                  * setup for corking
1814                  */
1815                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1816                                      ipc6, rt, fl6);
1817                 if (err)
1818                         return err;
1819
1820                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1821                 length += exthdrlen;
1822                 transhdrlen += exthdrlen;
1823         } else {
1824                 fl6 = &inet->cork.fl.u.ip6;
1825                 transhdrlen = 0;
1826         }
1827
1828         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1829                                  &np->cork, sk_page_frag(sk), getfrag,
1830                                  from, length, transhdrlen, flags, ipc6);
1831 }
1832 EXPORT_SYMBOL_GPL(ip6_append_data);
1833
1834 static void ip6_cork_release(struct inet_cork_full *cork,
1835                              struct inet6_cork *v6_cork)
1836 {
1837         if (v6_cork->opt) {
1838                 kfree(v6_cork->opt->dst0opt);
1839                 kfree(v6_cork->opt->dst1opt);
1840                 kfree(v6_cork->opt->hopopt);
1841                 kfree(v6_cork->opt->srcrt);
1842                 kfree(v6_cork->opt);
1843                 v6_cork->opt = NULL;
1844         }
1845
1846         if (cork->base.dst) {
1847                 dst_release(cork->base.dst);
1848                 cork->base.dst = NULL;
1849                 cork->base.flags &= ~IPCORK_ALLFRAG;
1850         }
1851         memset(&cork->fl, 0, sizeof(cork->fl));
1852 }
1853
1854 struct sk_buff *__ip6_make_skb(struct sock *sk,
1855                                struct sk_buff_head *queue,
1856                                struct inet_cork_full *cork,
1857                                struct inet6_cork *v6_cork)
1858 {
1859         struct sk_buff *skb, *tmp_skb;
1860         struct sk_buff **tail_skb;
1861         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1862         struct ipv6_pinfo *np = inet6_sk(sk);
1863         struct net *net = sock_net(sk);
1864         struct ipv6hdr *hdr;
1865         struct ipv6_txoptions *opt = v6_cork->opt;
1866         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1867         struct flowi6 *fl6 = &cork->fl.u.ip6;
1868         unsigned char proto = fl6->flowi6_proto;
1869
1870         skb = __skb_dequeue(queue);
1871         if (!skb)
1872                 goto out;
1873         tail_skb = &(skb_shinfo(skb)->frag_list);
1874
1875         /* move skb->data to ip header from ext header */
1876         if (skb->data < skb_network_header(skb))
1877                 __skb_pull(skb, skb_network_offset(skb));
1878         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1879                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1880                 *tail_skb = tmp_skb;
1881                 tail_skb = &(tmp_skb->next);
1882                 skb->len += tmp_skb->len;
1883                 skb->data_len += tmp_skb->len;
1884                 skb->truesize += tmp_skb->truesize;
1885                 tmp_skb->destructor = NULL;
1886                 tmp_skb->sk = NULL;
1887         }
1888
1889         /* Allow local fragmentation. */
1890         skb->ignore_df = ip6_sk_ignore_df(sk);
1891
1892         *final_dst = fl6->daddr;
1893         __skb_pull(skb, skb_network_header_len(skb));
1894         if (opt && opt->opt_flen)
1895                 ipv6_push_frag_opts(skb, opt, &proto);
1896         if (opt && opt->opt_nflen)
1897                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1898
1899         skb_push(skb, sizeof(struct ipv6hdr));
1900         skb_reset_network_header(skb);
1901         hdr = ipv6_hdr(skb);
1902
1903         ip6_flow_hdr(hdr, v6_cork->tclass,
1904                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1905                                         ip6_autoflowlabel(net, np), fl6));
1906         hdr->hop_limit = v6_cork->hop_limit;
1907         hdr->nexthdr = proto;
1908         hdr->saddr = fl6->saddr;
1909         hdr->daddr = *final_dst;
1910
1911         skb->priority = sk->sk_priority;
1912         skb->mark = cork->base.mark;
1913
1914         skb->tstamp = cork->base.transmit_time;
1915
1916         skb_dst_set(skb, dst_clone(&rt->dst));
1917         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1918         if (proto == IPPROTO_ICMPV6) {
1919                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1920
1921                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1922                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1923         }
1924
1925         ip6_cork_release(cork, v6_cork);
1926 out:
1927         return skb;
1928 }
1929
1930 int ip6_send_skb(struct sk_buff *skb)
1931 {
1932         struct net *net = sock_net(skb->sk);
1933         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1934         int err;
1935
1936         err = ip6_local_out(net, skb->sk, skb);
1937         if (err) {
1938                 if (err > 0)
1939                         err = net_xmit_errno(err);
1940                 if (err)
1941                         IP6_INC_STATS(net, rt->rt6i_idev,
1942                                       IPSTATS_MIB_OUTDISCARDS);
1943         }
1944
1945         return err;
1946 }
1947
1948 int ip6_push_pending_frames(struct sock *sk)
1949 {
1950         struct sk_buff *skb;
1951
1952         skb = ip6_finish_skb(sk);
1953         if (!skb)
1954                 return 0;
1955
1956         return ip6_send_skb(skb);
1957 }
1958 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1959
1960 static void __ip6_flush_pending_frames(struct sock *sk,
1961                                        struct sk_buff_head *queue,
1962                                        struct inet_cork_full *cork,
1963                                        struct inet6_cork *v6_cork)
1964 {
1965         struct sk_buff *skb;
1966
1967         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1968                 if (skb_dst(skb))
1969                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1970                                       IPSTATS_MIB_OUTDISCARDS);
1971                 kfree_skb(skb);
1972         }
1973
1974         ip6_cork_release(cork, v6_cork);
1975 }
1976
1977 void ip6_flush_pending_frames(struct sock *sk)
1978 {
1979         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1980                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1981 }
1982 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1983
1984 struct sk_buff *ip6_make_skb(struct sock *sk,
1985                              int getfrag(void *from, char *to, int offset,
1986                                          int len, int odd, struct sk_buff *skb),
1987                              void *from, int length, int transhdrlen,
1988                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1989                              struct rt6_info *rt, unsigned int flags,
1990                              struct inet_cork_full *cork)
1991 {
1992         struct inet6_cork v6_cork;
1993         struct sk_buff_head queue;
1994         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1995         int err;
1996
1997         if (flags & MSG_PROBE)
1998                 return NULL;
1999
2000         __skb_queue_head_init(&queue);
2001
2002         cork->base.flags = 0;
2003         cork->base.addr = 0;
2004         cork->base.opt = NULL;
2005         cork->base.dst = NULL;
2006         v6_cork.opt = NULL;
2007         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
2008         if (err) {
2009                 ip6_cork_release(cork, &v6_cork);
2010                 return ERR_PTR(err);
2011         }
2012         if (ipc6->dontfrag < 0)
2013                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
2014
2015         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
2016                                 &current->task_frag, getfrag, from,
2017                                 length + exthdrlen, transhdrlen + exthdrlen,
2018                                 flags, ipc6);
2019         if (err) {
2020                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
2021                 return ERR_PTR(err);
2022         }
2023
2024         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
2025 }