GNU Linux-libre 4.19.207-gnu1
[releases.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <linux/jhash.h>
48 #include <net/net_namespace.h>
49 #include <net/snmp.h>
50 #include <net/ipv6.h>
51 #include <net/ip6_fib.h>
52 #include <net/ip6_route.h>
53 #include <net/ndisc.h>
54 #include <net/addrconf.h>
55 #include <net/tcp.h>
56 #include <linux/rtnetlink.h>
57 #include <net/dst.h>
58 #include <net/dst_metadata.h>
59 #include <net/xfrm.h>
60 #include <net/netevent.h>
61 #include <net/netlink.h>
62 #include <net/nexthop.h>
63 #include <net/lwtunnel.h>
64 #include <net/ip_tunnels.h>
65 #include <net/l3mdev.h>
66 #include <net/ip.h>
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 static int ip6_rt_type_to_error(u8 fib6_type);
74
75 #define CREATE_TRACE_POINTS
76 #include <trace/events/fib6.h>
77 EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78 #undef CREATE_TRACE_POINTS
79
80 enum rt6_nud_state {
81         RT6_NUD_FAIL_HARD = -3,
82         RT6_NUD_FAIL_PROBE = -2,
83         RT6_NUD_FAIL_DO_RR = -1,
84         RT6_NUD_SUCCEED = 1
85 };
86
87 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
88 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
89 static unsigned int      ip6_mtu(const struct dst_entry *dst);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(struct dst_ops *ops);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
98 static int              ip6_pkt_prohibit(struct sk_buff *skb);
99 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
100 static void             ip6_link_failure(struct sk_buff *skb);
101 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102                                            struct sk_buff *skb, u32 mtu,
103                                            bool confirm_neigh);
104 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
105                                         struct sk_buff *skb);
106 static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
107 static size_t rt6_nlmsg_size(struct fib6_info *rt);
108 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
109                          struct fib6_info *rt, struct dst_entry *dst,
110                          struct in6_addr *dest, struct in6_addr *src,
111                          int iif, int type, u32 portid, u32 seq,
112                          unsigned int flags);
113 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
114                                            const struct in6_addr *daddr,
115                                            const struct in6_addr *saddr);
116
117 #ifdef CONFIG_IPV6_ROUTE_INFO
118 static struct fib6_info *rt6_add_route_info(struct net *net,
119                                            const struct in6_addr *prefix, int prefixlen,
120                                            const struct in6_addr *gwaddr,
121                                            struct net_device *dev,
122                                            unsigned int pref);
123 static struct fib6_info *rt6_get_route_info(struct net *net,
124                                            const struct in6_addr *prefix, int prefixlen,
125                                            const struct in6_addr *gwaddr,
126                                            struct net_device *dev);
127 #endif
128
129 struct uncached_list {
130         spinlock_t              lock;
131         struct list_head        head;
132 };
133
134 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
135
136 void rt6_uncached_list_add(struct rt6_info *rt)
137 {
138         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
139
140         rt->rt6i_uncached_list = ul;
141
142         spin_lock_bh(&ul->lock);
143         list_add_tail(&rt->rt6i_uncached, &ul->head);
144         spin_unlock_bh(&ul->lock);
145 }
146
147 void rt6_uncached_list_del(struct rt6_info *rt)
148 {
149         if (!list_empty(&rt->rt6i_uncached)) {
150                 struct uncached_list *ul = rt->rt6i_uncached_list;
151                 struct net *net = dev_net(rt->dst.dev);
152
153                 spin_lock_bh(&ul->lock);
154                 list_del(&rt->rt6i_uncached);
155                 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
156                 spin_unlock_bh(&ul->lock);
157         }
158 }
159
160 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
161 {
162         struct net_device *loopback_dev = net->loopback_dev;
163         int cpu;
164
165         if (dev == loopback_dev)
166                 return;
167
168         for_each_possible_cpu(cpu) {
169                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
170                 struct rt6_info *rt;
171
172                 spin_lock_bh(&ul->lock);
173                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
174                         struct inet6_dev *rt_idev = rt->rt6i_idev;
175                         struct net_device *rt_dev = rt->dst.dev;
176
177                         if (rt_idev->dev == dev) {
178                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
179                                 in6_dev_put(rt_idev);
180                         }
181
182                         if (rt_dev == dev) {
183                                 rt->dst.dev = loopback_dev;
184                                 dev_hold(rt->dst.dev);
185                                 dev_put(rt_dev);
186                         }
187                 }
188                 spin_unlock_bh(&ul->lock);
189         }
190 }
191
192 static inline const void *choose_neigh_daddr(const struct in6_addr *p,
193                                              struct sk_buff *skb,
194                                              const void *daddr)
195 {
196         if (!ipv6_addr_any(p))
197                 return (const void *) p;
198         else if (skb)
199                 return &ipv6_hdr(skb)->daddr;
200         return daddr;
201 }
202
203 struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204                                    struct net_device *dev,
205                                    struct sk_buff *skb,
206                                    const void *daddr)
207 {
208         struct neighbour *n;
209
210         daddr = choose_neigh_daddr(gw, skb, daddr);
211         n = __ipv6_neigh_lookup(dev, daddr);
212         if (n)
213                 return n;
214
215         n = neigh_create(&nd_tbl, daddr, dev);
216         return IS_ERR(n) ? NULL : n;
217 }
218
219 static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220                                               struct sk_buff *skb,
221                                               const void *daddr)
222 {
223         const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224
225         return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
226 }
227
228 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
229 {
230         struct net_device *dev = dst->dev;
231         struct rt6_info *rt = (struct rt6_info *)dst;
232
233         daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
234         if (!daddr)
235                 return;
236         if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
237                 return;
238         if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
239                 return;
240         __ipv6_confirm_neigh(dev, daddr);
241 }
242
243 static struct dst_ops ip6_dst_ops_template = {
244         .family                 =       AF_INET6,
245         .gc                     =       ip6_dst_gc,
246         .gc_thresh              =       1024,
247         .check                  =       ip6_dst_check,
248         .default_advmss         =       ip6_default_advmss,
249         .mtu                    =       ip6_mtu,
250         .cow_metrics            =       dst_cow_metrics_generic,
251         .destroy                =       ip6_dst_destroy,
252         .ifdown                 =       ip6_dst_ifdown,
253         .negative_advice        =       ip6_negative_advice,
254         .link_failure           =       ip6_link_failure,
255         .update_pmtu            =       ip6_rt_update_pmtu,
256         .redirect               =       rt6_do_redirect,
257         .local_out              =       __ip6_local_out,
258         .neigh_lookup           =       ip6_dst_neigh_lookup,
259         .confirm_neigh          =       ip6_confirm_neigh,
260 };
261
262 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263 {
264         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265
266         return mtu ? : dst->dev->mtu;
267 }
268
269 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
270                                          struct sk_buff *skb, u32 mtu,
271                                          bool confirm_neigh)
272 {
273 }
274
275 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
276                                       struct sk_buff *skb)
277 {
278 }
279
280 static struct dst_ops ip6_dst_blackhole_ops = {
281         .family                 =       AF_INET6,
282         .destroy                =       ip6_dst_destroy,
283         .check                  =       ip6_dst_check,
284         .mtu                    =       ip6_blackhole_mtu,
285         .default_advmss         =       ip6_default_advmss,
286         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
287         .redirect               =       ip6_rt_blackhole_redirect,
288         .cow_metrics            =       dst_cow_metrics_generic,
289         .neigh_lookup           =       ip6_dst_neigh_lookup,
290 };
291
292 static const u32 ip6_template_metrics[RTAX_MAX] = {
293         [RTAX_HOPLIMIT - 1] = 0,
294 };
295
296 static const struct fib6_info fib6_null_entry_template = {
297         .fib6_flags     = (RTF_REJECT | RTF_NONEXTHOP),
298         .fib6_protocol  = RTPROT_KERNEL,
299         .fib6_metric    = ~(u32)0,
300         .fib6_ref       = ATOMIC_INIT(1),
301         .fib6_type      = RTN_UNREACHABLE,
302         .fib6_metrics   = (struct dst_metrics *)&dst_default_metrics,
303 };
304
305 static const struct rt6_info ip6_null_entry_template = {
306         .dst = {
307                 .__refcnt       = ATOMIC_INIT(1),
308                 .__use          = 1,
309                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
310                 .error          = -ENETUNREACH,
311                 .input          = ip6_pkt_discard,
312                 .output         = ip6_pkt_discard_out,
313         },
314         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
315 };
316
317 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
318
319 static const struct rt6_info ip6_prohibit_entry_template = {
320         .dst = {
321                 .__refcnt       = ATOMIC_INIT(1),
322                 .__use          = 1,
323                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
324                 .error          = -EACCES,
325                 .input          = ip6_pkt_prohibit,
326                 .output         = ip6_pkt_prohibit_out,
327         },
328         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
329 };
330
331 static const struct rt6_info ip6_blk_hole_entry_template = {
332         .dst = {
333                 .__refcnt       = ATOMIC_INIT(1),
334                 .__use          = 1,
335                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
336                 .error          = -EINVAL,
337                 .input          = dst_discard,
338                 .output         = dst_discard_out,
339         },
340         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
341 };
342
343 #endif
344
345 static void rt6_info_init(struct rt6_info *rt)
346 {
347         struct dst_entry *dst = &rt->dst;
348
349         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
350         INIT_LIST_HEAD(&rt->rt6i_uncached);
351 }
352
353 /* allocate dst with ip6_dst_ops */
354 struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
355                                int flags)
356 {
357         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
358                                         1, DST_OBSOLETE_FORCE_CHK, flags);
359
360         if (rt) {
361                 rt6_info_init(rt);
362                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
363         }
364
365         return rt;
366 }
367 EXPORT_SYMBOL(ip6_dst_alloc);
368
369 static void ip6_dst_destroy(struct dst_entry *dst)
370 {
371         struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
372         struct rt6_info *rt = (struct rt6_info *)dst;
373         struct fib6_info *from;
374         struct inet6_dev *idev;
375
376         if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
377                 kfree(p);
378
379         rt6_uncached_list_del(rt);
380
381         idev = rt->rt6i_idev;
382         if (idev) {
383                 rt->rt6i_idev = NULL;
384                 in6_dev_put(idev);
385         }
386
387         from = xchg((__force struct fib6_info **)&rt->from, NULL);
388         fib6_info_release(from);
389 }
390
391 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
392                            int how)
393 {
394         struct rt6_info *rt = (struct rt6_info *)dst;
395         struct inet6_dev *idev = rt->rt6i_idev;
396         struct net_device *loopback_dev =
397                 dev_net(dev)->loopback_dev;
398
399         if (idev && idev->dev != loopback_dev) {
400                 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
401                 if (loopback_idev) {
402                         rt->rt6i_idev = loopback_idev;
403                         in6_dev_put(idev);
404                 }
405         }
406 }
407
408 static bool __rt6_check_expired(const struct rt6_info *rt)
409 {
410         if (rt->rt6i_flags & RTF_EXPIRES)
411                 return time_after(jiffies, rt->dst.expires);
412         else
413                 return false;
414 }
415
416 static bool rt6_check_expired(const struct rt6_info *rt)
417 {
418         struct fib6_info *from;
419
420         from = rcu_dereference(rt->from);
421
422         if (rt->rt6i_flags & RTF_EXPIRES) {
423                 if (time_after(jiffies, rt->dst.expires))
424                         return true;
425         } else if (from) {
426                 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
427                         fib6_check_expired(from);
428         }
429         return false;
430 }
431
432 struct fib6_info *fib6_multipath_select(const struct net *net,
433                                         struct fib6_info *match,
434                                         struct flowi6 *fl6, int oif,
435                                         const struct sk_buff *skb,
436                                         int strict)
437 {
438         struct fib6_info *sibling, *next_sibling;
439
440         /* We might have already computed the hash for ICMPv6 errors. In such
441          * case it will always be non-zero. Otherwise now is the time to do it.
442          */
443         if (!fl6->mp_hash)
444                 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
445
446         if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
447                 return match;
448
449         list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
450                                  fib6_siblings) {
451                 int nh_upper_bound;
452
453                 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
454                 if (fl6->mp_hash > nh_upper_bound)
455                         continue;
456                 if (rt6_score_route(sibling, oif, strict) < 0)
457                         break;
458                 match = sibling;
459                 break;
460         }
461
462         return match;
463 }
464
465 /*
466  *      Route lookup. rcu_read_lock() should be held.
467  */
468
469 static inline struct fib6_info *rt6_device_match(struct net *net,
470                                                  struct fib6_info *rt,
471                                                     const struct in6_addr *saddr,
472                                                     int oif,
473                                                     int flags)
474 {
475         struct fib6_info *sprt;
476
477         if (!oif && ipv6_addr_any(saddr) &&
478             !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
479                 return rt;
480
481         for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
482                 const struct net_device *dev = sprt->fib6_nh.nh_dev;
483
484                 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
485                         continue;
486
487                 if (oif) {
488                         if (dev->ifindex == oif)
489                                 return sprt;
490                 } else {
491                         if (ipv6_chk_addr(net, saddr, dev,
492                                           flags & RT6_LOOKUP_F_IFACE))
493                                 return sprt;
494                 }
495         }
496
497         if (oif && flags & RT6_LOOKUP_F_IFACE)
498                 return net->ipv6.fib6_null_entry;
499
500         return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
501 }
502
503 #ifdef CONFIG_IPV6_ROUTER_PREF
504 struct __rt6_probe_work {
505         struct work_struct work;
506         struct in6_addr target;
507         struct net_device *dev;
508 };
509
510 static void rt6_probe_deferred(struct work_struct *w)
511 {
512         struct in6_addr mcaddr;
513         struct __rt6_probe_work *work =
514                 container_of(w, struct __rt6_probe_work, work);
515
516         addrconf_addr_solict_mult(&work->target, &mcaddr);
517         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
518         dev_put(work->dev);
519         kfree(work);
520 }
521
522 static void rt6_probe(struct fib6_info *rt)
523 {
524         struct __rt6_probe_work *work = NULL;
525         const struct in6_addr *nh_gw;
526         unsigned long last_probe;
527         struct neighbour *neigh;
528         struct net_device *dev;
529         struct inet6_dev *idev;
530
531         /*
532          * Okay, this does not seem to be appropriate
533          * for now, however, we need to check if it
534          * is really so; aka Router Reachability Probing.
535          *
536          * Router Reachability Probe MUST be rate-limited
537          * to no more than one per minute.
538          */
539         if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
540                 return;
541
542         nh_gw = &rt->fib6_nh.nh_gw;
543         dev = rt->fib6_nh.nh_dev;
544         rcu_read_lock_bh();
545         last_probe = READ_ONCE(rt->last_probe);
546         idev = __in6_dev_get(dev);
547         neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
548         if (neigh) {
549                 if (neigh->nud_state & NUD_VALID)
550                         goto out;
551
552                 write_lock(&neigh->lock);
553                 if (!(neigh->nud_state & NUD_VALID) &&
554                     time_after(jiffies,
555                                neigh->updated + idev->cnf.rtr_probe_interval)) {
556                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
557                         if (work)
558                                 __neigh_set_probe_once(neigh);
559                 }
560                 write_unlock(&neigh->lock);
561         } else if (time_after(jiffies, last_probe +
562                                        idev->cnf.rtr_probe_interval)) {
563                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
564         }
565
566         if (!work || cmpxchg(&rt->last_probe,
567                              last_probe, jiffies) != last_probe) {
568                 kfree(work);
569         } else {
570                 INIT_WORK(&work->work, rt6_probe_deferred);
571                 work->target = *nh_gw;
572                 dev_hold(dev);
573                 work->dev = dev;
574                 schedule_work(&work->work);
575         }
576
577 out:
578         rcu_read_unlock_bh();
579 }
580 #else
581 static inline void rt6_probe(struct fib6_info *rt)
582 {
583 }
584 #endif
585
586 /*
587  * Default Router Selection (RFC 2461 6.3.6)
588  */
589 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
590 {
591         const struct net_device *dev = rt->fib6_nh.nh_dev;
592
593         if (!oif || dev->ifindex == oif)
594                 return 2;
595         return 0;
596 }
597
598 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
599 {
600         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
601         struct neighbour *neigh;
602
603         if (rt->fib6_flags & RTF_NONEXTHOP ||
604             !(rt->fib6_flags & RTF_GATEWAY))
605                 return RT6_NUD_SUCCEED;
606
607         rcu_read_lock_bh();
608         neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
609                                           &rt->fib6_nh.nh_gw);
610         if (neigh) {
611                 read_lock(&neigh->lock);
612                 if (neigh->nud_state & NUD_VALID)
613                         ret = RT6_NUD_SUCCEED;
614 #ifdef CONFIG_IPV6_ROUTER_PREF
615                 else if (!(neigh->nud_state & NUD_FAILED))
616                         ret = RT6_NUD_SUCCEED;
617                 else
618                         ret = RT6_NUD_FAIL_PROBE;
619 #endif
620                 read_unlock(&neigh->lock);
621         } else {
622                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
623                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
624         }
625         rcu_read_unlock_bh();
626
627         return ret;
628 }
629
630 static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
631 {
632         int m;
633
634         m = rt6_check_dev(rt, oif);
635         if (!m && (strict & RT6_LOOKUP_F_IFACE))
636                 return RT6_NUD_FAIL_HARD;
637 #ifdef CONFIG_IPV6_ROUTER_PREF
638         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
639 #endif
640         if (strict & RT6_LOOKUP_F_REACHABLE) {
641                 int n = rt6_check_neigh(rt);
642                 if (n < 0)
643                         return n;
644         }
645         return m;
646 }
647
648 /* called with rc_read_lock held */
649 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
650 {
651         const struct net_device *dev = fib6_info_nh_dev(f6i);
652         bool rc = false;
653
654         if (dev) {
655                 const struct inet6_dev *idev = __in6_dev_get(dev);
656
657                 rc = !!idev->cnf.ignore_routes_with_linkdown;
658         }
659
660         return rc;
661 }
662
663 static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
664                                    int *mpri, struct fib6_info *match,
665                                    bool *do_rr)
666 {
667         int m;
668         bool match_do_rr = false;
669
670         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
671                 goto out;
672
673         if (fib6_ignore_linkdown(rt) &&
674             rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
675             !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
676                 goto out;
677
678         if (fib6_check_expired(rt))
679                 goto out;
680
681         m = rt6_score_route(rt, oif, strict);
682         if (m == RT6_NUD_FAIL_DO_RR) {
683                 match_do_rr = true;
684                 m = 0; /* lowest valid score */
685         } else if (m == RT6_NUD_FAIL_HARD) {
686                 goto out;
687         }
688
689         if (strict & RT6_LOOKUP_F_REACHABLE)
690                 rt6_probe(rt);
691
692         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
693         if (m > *mpri) {
694                 *do_rr = match_do_rr;
695                 *mpri = m;
696                 match = rt;
697         }
698 out:
699         return match;
700 }
701
702 static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
703                                      struct fib6_info *leaf,
704                                      struct fib6_info *rr_head,
705                                      u32 metric, int oif, int strict,
706                                      bool *do_rr)
707 {
708         struct fib6_info *rt, *match, *cont;
709         int mpri = -1;
710
711         match = NULL;
712         cont = NULL;
713         for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
714                 if (rt->fib6_metric != metric) {
715                         cont = rt;
716                         break;
717                 }
718
719                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
720         }
721
722         for (rt = leaf; rt && rt != rr_head;
723              rt = rcu_dereference(rt->fib6_next)) {
724                 if (rt->fib6_metric != metric) {
725                         cont = rt;
726                         break;
727                 }
728
729                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730         }
731
732         if (match || !cont)
733                 return match;
734
735         for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
736                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
737
738         return match;
739 }
740
741 static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
742                                    int oif, int strict)
743 {
744         struct fib6_info *leaf = rcu_dereference(fn->leaf);
745         struct fib6_info *match, *rt0;
746         bool do_rr = false;
747         int key_plen;
748
749         if (!leaf || leaf == net->ipv6.fib6_null_entry)
750                 return net->ipv6.fib6_null_entry;
751
752         rt0 = rcu_dereference(fn->rr_ptr);
753         if (!rt0)
754                 rt0 = leaf;
755
756         /* Double check to make sure fn is not an intermediate node
757          * and fn->leaf does not points to its child's leaf
758          * (This might happen if all routes under fn are deleted from
759          * the tree and fib6_repair_tree() is called on the node.)
760          */
761         key_plen = rt0->fib6_dst.plen;
762 #ifdef CONFIG_IPV6_SUBTREES
763         if (rt0->fib6_src.plen)
764                 key_plen = rt0->fib6_src.plen;
765 #endif
766         if (fn->fn_bit != key_plen)
767                 return net->ipv6.fib6_null_entry;
768
769         match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
770                              &do_rr);
771
772         if (do_rr) {
773                 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
774
775                 /* no entries matched; do round-robin */
776                 if (!next || next->fib6_metric != rt0->fib6_metric)
777                         next = leaf;
778
779                 if (next != rt0) {
780                         spin_lock_bh(&leaf->fib6_table->tb6_lock);
781                         /* make sure next is not being deleted from the tree */
782                         if (next->fib6_node)
783                                 rcu_assign_pointer(fn->rr_ptr, next);
784                         spin_unlock_bh(&leaf->fib6_table->tb6_lock);
785                 }
786         }
787
788         return match ? match : net->ipv6.fib6_null_entry;
789 }
790
791 static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
792 {
793         return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
794 }
795
796 #ifdef CONFIG_IPV6_ROUTE_INFO
797 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
798                   const struct in6_addr *gwaddr)
799 {
800         struct net *net = dev_net(dev);
801         struct route_info *rinfo = (struct route_info *) opt;
802         struct in6_addr prefix_buf, *prefix;
803         unsigned int pref;
804         unsigned long lifetime;
805         struct fib6_info *rt;
806
807         if (len < sizeof(struct route_info)) {
808                 return -EINVAL;
809         }
810
811         /* Sanity check for prefix_len and length */
812         if (rinfo->length > 3) {
813                 return -EINVAL;
814         } else if (rinfo->prefix_len > 128) {
815                 return -EINVAL;
816         } else if (rinfo->prefix_len > 64) {
817                 if (rinfo->length < 2) {
818                         return -EINVAL;
819                 }
820         } else if (rinfo->prefix_len > 0) {
821                 if (rinfo->length < 1) {
822                         return -EINVAL;
823                 }
824         }
825
826         pref = rinfo->route_pref;
827         if (pref == ICMPV6_ROUTER_PREF_INVALID)
828                 return -EINVAL;
829
830         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
831
832         if (rinfo->length == 3)
833                 prefix = (struct in6_addr *)rinfo->prefix;
834         else {
835                 /* this function is safe */
836                 ipv6_addr_prefix(&prefix_buf,
837                                  (struct in6_addr *)rinfo->prefix,
838                                  rinfo->prefix_len);
839                 prefix = &prefix_buf;
840         }
841
842         if (rinfo->prefix_len == 0)
843                 rt = rt6_get_dflt_router(net, gwaddr, dev);
844         else
845                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
846                                         gwaddr, dev);
847
848         if (rt && !lifetime) {
849                 ip6_del_rt(net, rt);
850                 rt = NULL;
851         }
852
853         if (!rt && lifetime)
854                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
855                                         dev, pref);
856         else if (rt)
857                 rt->fib6_flags = RTF_ROUTEINFO |
858                                  (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
859
860         if (rt) {
861                 if (!addrconf_finite_timeout(lifetime))
862                         fib6_clean_expires(rt);
863                 else
864                         fib6_set_expires(rt, jiffies + HZ * lifetime);
865
866                 fib6_info_release(rt);
867         }
868         return 0;
869 }
870 #endif
871
872 /*
873  *      Misc support functions
874  */
875
876 /* called with rcu_lock held */
877 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
878 {
879         struct net_device *dev = rt->fib6_nh.nh_dev;
880
881         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
882                 /* for copies of local routes, dst->dev needs to be the
883                  * device if it is a master device, the master device if
884                  * device is enslaved, and the loopback as the default
885                  */
886                 if (netif_is_l3_slave(dev) &&
887                     !rt6_need_strict(&rt->fib6_dst.addr))
888                         dev = l3mdev_master_dev_rcu(dev);
889                 else if (!netif_is_l3_master(dev))
890                         dev = dev_net(dev)->loopback_dev;
891                 /* last case is netif_is_l3_master(dev) is true in which
892                  * case we want dev returned to be dev
893                  */
894         }
895
896         return dev;
897 }
898
899 static const int fib6_prop[RTN_MAX + 1] = {
900         [RTN_UNSPEC]    = 0,
901         [RTN_UNICAST]   = 0,
902         [RTN_LOCAL]     = 0,
903         [RTN_BROADCAST] = 0,
904         [RTN_ANYCAST]   = 0,
905         [RTN_MULTICAST] = 0,
906         [RTN_BLACKHOLE] = -EINVAL,
907         [RTN_UNREACHABLE] = -EHOSTUNREACH,
908         [RTN_PROHIBIT]  = -EACCES,
909         [RTN_THROW]     = -EAGAIN,
910         [RTN_NAT]       = -EINVAL,
911         [RTN_XRESOLVE]  = -EINVAL,
912 };
913
914 static int ip6_rt_type_to_error(u8 fib6_type)
915 {
916         return fib6_prop[fib6_type];
917 }
918
919 static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
920 {
921         unsigned short flags = 0;
922
923         if (rt->dst_nocount)
924                 flags |= DST_NOCOUNT;
925         if (rt->dst_nopolicy)
926                 flags |= DST_NOPOLICY;
927         if (rt->dst_host)
928                 flags |= DST_HOST;
929
930         return flags;
931 }
932
933 static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
934 {
935         rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
936
937         switch (ort->fib6_type) {
938         case RTN_BLACKHOLE:
939                 rt->dst.output = dst_discard_out;
940                 rt->dst.input = dst_discard;
941                 break;
942         case RTN_PROHIBIT:
943                 rt->dst.output = ip6_pkt_prohibit_out;
944                 rt->dst.input = ip6_pkt_prohibit;
945                 break;
946         case RTN_THROW:
947         case RTN_UNREACHABLE:
948         default:
949                 rt->dst.output = ip6_pkt_discard_out;
950                 rt->dst.input = ip6_pkt_discard;
951                 break;
952         }
953 }
954
955 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
956 {
957         if (ort->fib6_flags & RTF_REJECT) {
958                 ip6_rt_init_dst_reject(rt, ort);
959                 return;
960         }
961
962         rt->dst.error = 0;
963         rt->dst.output = ip6_output;
964
965         if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
966                 rt->dst.input = ip6_input;
967         } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
968                 rt->dst.input = ip6_mc_input;
969         } else {
970                 rt->dst.input = ip6_forward;
971         }
972
973         if (ort->fib6_nh.nh_lwtstate) {
974                 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
975                 lwtunnel_set_redirect(&rt->dst);
976         }
977
978         rt->dst.lastuse = jiffies;
979 }
980
981 /* Caller must already hold reference to @from */
982 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
983 {
984         rt->rt6i_flags &= ~RTF_EXPIRES;
985         rcu_assign_pointer(rt->from, from);
986         dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
987         if (from->fib6_metrics != &dst_default_metrics) {
988                 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
989                 refcount_inc(&from->fib6_metrics->refcnt);
990         }
991 }
992
993 /* Caller must already hold reference to @ort */
994 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
995 {
996         struct net_device *dev = fib6_info_nh_dev(ort);
997
998         ip6_rt_init_dst(rt, ort);
999
1000         rt->rt6i_dst = ort->fib6_dst;
1001         rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
1002         rt->rt6i_gateway = ort->fib6_nh.nh_gw;
1003         rt->rt6i_flags = ort->fib6_flags;
1004         rt6_set_from(rt, ort);
1005 #ifdef CONFIG_IPV6_SUBTREES
1006         rt->rt6i_src = ort->fib6_src;
1007 #endif
1008         rt->rt6i_prefsrc = ort->fib6_prefsrc;
1009 }
1010
1011 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1012                                         struct in6_addr *saddr)
1013 {
1014         struct fib6_node *pn, *sn;
1015         while (1) {
1016                 if (fn->fn_flags & RTN_TL_ROOT)
1017                         return NULL;
1018                 pn = rcu_dereference(fn->parent);
1019                 sn = FIB6_SUBTREE(pn);
1020                 if (sn && sn != fn)
1021                         fn = fib6_node_lookup(sn, NULL, saddr);
1022                 else
1023                         fn = pn;
1024                 if (fn->fn_flags & RTN_RTINFO)
1025                         return fn;
1026         }
1027 }
1028
1029 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1030                           bool null_fallback)
1031 {
1032         struct rt6_info *rt = *prt;
1033
1034         if (dst_hold_safe(&rt->dst))
1035                 return true;
1036         if (null_fallback) {
1037                 rt = net->ipv6.ip6_null_entry;
1038                 dst_hold(&rt->dst);
1039         } else {
1040                 rt = NULL;
1041         }
1042         *prt = rt;
1043         return false;
1044 }
1045
1046 /* called with rcu_lock held */
1047 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1048 {
1049         unsigned short flags = fib6_info_dst_flags(rt);
1050         struct net_device *dev = rt->fib6_nh.nh_dev;
1051         struct rt6_info *nrt;
1052
1053         if (!fib6_info_hold_safe(rt))
1054                 goto fallback;
1055
1056         nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1057         if (!nrt) {
1058                 fib6_info_release(rt);
1059                 goto fallback;
1060         }
1061
1062         ip6_rt_copy_init(nrt, rt);
1063         return nrt;
1064
1065 fallback:
1066         nrt = dev_net(dev)->ipv6.ip6_null_entry;
1067         dst_hold(&nrt->dst);
1068         return nrt;
1069 }
1070
1071 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1072                                              struct fib6_table *table,
1073                                              struct flowi6 *fl6,
1074                                              const struct sk_buff *skb,
1075                                              int flags)
1076 {
1077         struct fib6_info *f6i;
1078         struct fib6_node *fn;
1079         struct rt6_info *rt;
1080
1081         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1082                 flags &= ~RT6_LOOKUP_F_IFACE;
1083
1084         rcu_read_lock();
1085         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1086 restart:
1087         f6i = rcu_dereference(fn->leaf);
1088         if (!f6i) {
1089                 f6i = net->ipv6.fib6_null_entry;
1090         } else {
1091                 f6i = rt6_device_match(net, f6i, &fl6->saddr,
1092                                       fl6->flowi6_oif, flags);
1093                 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
1094                         f6i = fib6_multipath_select(net, f6i, fl6,
1095                                                     fl6->flowi6_oif, skb,
1096                                                     flags);
1097         }
1098         if (f6i == net->ipv6.fib6_null_entry) {
1099                 fn = fib6_backtrack(fn, &fl6->saddr);
1100                 if (fn)
1101                         goto restart;
1102         }
1103
1104         trace_fib6_table_lookup(net, f6i, table, fl6);
1105
1106         /* Search through exception table */
1107         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1108         if (rt) {
1109                 if (ip6_hold_safe(net, &rt, true))
1110                         dst_use_noref(&rt->dst, jiffies);
1111         } else if (f6i == net->ipv6.fib6_null_entry) {
1112                 rt = net->ipv6.ip6_null_entry;
1113                 dst_hold(&rt->dst);
1114         } else {
1115                 rt = ip6_create_rt_rcu(f6i);
1116         }
1117
1118         rcu_read_unlock();
1119
1120         return rt;
1121 }
1122
1123 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1124                                    const struct sk_buff *skb, int flags)
1125 {
1126         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1127 }
1128 EXPORT_SYMBOL_GPL(ip6_route_lookup);
1129
1130 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1131                             const struct in6_addr *saddr, int oif,
1132                             const struct sk_buff *skb, int strict)
1133 {
1134         struct flowi6 fl6 = {
1135                 .flowi6_oif = oif,
1136                 .daddr = *daddr,
1137         };
1138         struct dst_entry *dst;
1139         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1140
1141         if (saddr) {
1142                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1143                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1144         }
1145
1146         dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1147         if (dst->error == 0)
1148                 return (struct rt6_info *) dst;
1149
1150         dst_release(dst);
1151
1152         return NULL;
1153 }
1154 EXPORT_SYMBOL(rt6_lookup);
1155
1156 /* ip6_ins_rt is called with FREE table->tb6_lock.
1157  * It takes new route entry, the addition fails by any reason the
1158  * route is released.
1159  * Caller must hold dst before calling it.
1160  */
1161
1162 static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1163                         struct netlink_ext_ack *extack)
1164 {
1165         int err;
1166         struct fib6_table *table;
1167
1168         table = rt->fib6_table;
1169         spin_lock_bh(&table->tb6_lock);
1170         err = fib6_add(&table->tb6_root, rt, info, extack);
1171         spin_unlock_bh(&table->tb6_lock);
1172
1173         return err;
1174 }
1175
1176 int ip6_ins_rt(struct net *net, struct fib6_info *rt)
1177 {
1178         struct nl_info info = { .nl_net = net, };
1179
1180         return __ip6_ins_rt(rt, &info, NULL);
1181 }
1182
1183 static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
1184                                            const struct in6_addr *daddr,
1185                                            const struct in6_addr *saddr)
1186 {
1187         struct net_device *dev;
1188         struct rt6_info *rt;
1189
1190         /*
1191          *      Clone the route.
1192          */
1193
1194         if (!fib6_info_hold_safe(ort))
1195                 return NULL;
1196
1197         dev = ip6_rt_get_dev_rcu(ort);
1198         rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1199         if (!rt) {
1200                 fib6_info_release(ort);
1201                 return NULL;
1202         }
1203
1204         ip6_rt_copy_init(rt, ort);
1205         rt->rt6i_flags |= RTF_CACHE;
1206         rt->dst.flags |= DST_HOST;
1207         rt->rt6i_dst.addr = *daddr;
1208         rt->rt6i_dst.plen = 128;
1209
1210         if (!rt6_is_gw_or_nonexthop(ort)) {
1211                 if (ort->fib6_dst.plen != 128 &&
1212                     ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
1213                         rt->rt6i_flags |= RTF_ANYCAST;
1214 #ifdef CONFIG_IPV6_SUBTREES
1215                 if (rt->rt6i_src.plen && saddr) {
1216                         rt->rt6i_src.addr = *saddr;
1217                         rt->rt6i_src.plen = 128;
1218                 }
1219 #endif
1220         }
1221
1222         return rt;
1223 }
1224
1225 static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1226 {
1227         unsigned short flags = fib6_info_dst_flags(rt);
1228         struct net_device *dev;
1229         struct rt6_info *pcpu_rt;
1230
1231         if (!fib6_info_hold_safe(rt))
1232                 return NULL;
1233
1234         rcu_read_lock();
1235         dev = ip6_rt_get_dev_rcu(rt);
1236         pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
1237         rcu_read_unlock();
1238         if (!pcpu_rt) {
1239                 fib6_info_release(rt);
1240                 return NULL;
1241         }
1242         ip6_rt_copy_init(pcpu_rt, rt);
1243         pcpu_rt->rt6i_flags |= RTF_PCPU;
1244         return pcpu_rt;
1245 }
1246
1247 /* It should be called with rcu_read_lock() acquired */
1248 static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1249 {
1250         struct rt6_info *pcpu_rt, **p;
1251
1252         p = this_cpu_ptr(rt->rt6i_pcpu);
1253         pcpu_rt = *p;
1254
1255         if (pcpu_rt)
1256                 ip6_hold_safe(NULL, &pcpu_rt, false);
1257
1258         return pcpu_rt;
1259 }
1260
1261 static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1262                                             struct fib6_info *rt)
1263 {
1264         struct rt6_info *pcpu_rt, *prev, **p;
1265
1266         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1267         if (!pcpu_rt) {
1268                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1269                 return net->ipv6.ip6_null_entry;
1270         }
1271
1272         dst_hold(&pcpu_rt->dst);
1273         p = this_cpu_ptr(rt->rt6i_pcpu);
1274         prev = cmpxchg(p, NULL, pcpu_rt);
1275         BUG_ON(prev);
1276
1277         if (rt->fib6_destroying) {
1278                 struct fib6_info *from;
1279
1280                 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
1281                 fib6_info_release(from);
1282         }
1283
1284         return pcpu_rt;
1285 }
1286
1287 /* exception hash table implementation
1288  */
1289 static DEFINE_SPINLOCK(rt6_exception_lock);
1290
1291 /* Remove rt6_ex from hash table and free the memory
1292  * Caller must hold rt6_exception_lock
1293  */
1294 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1295                                  struct rt6_exception *rt6_ex)
1296 {
1297         struct fib6_info *from;
1298         struct net *net;
1299
1300         if (!bucket || !rt6_ex)
1301                 return;
1302
1303         net = dev_net(rt6_ex->rt6i->dst.dev);
1304         net->ipv6.rt6_stats->fib_rt_cache--;
1305
1306         /* purge completely the exception to allow releasing the held resources:
1307          * some [sk] cache may keep the dst around for unlimited time
1308          */
1309         from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
1310         fib6_info_release(from);
1311         dst_dev_put(&rt6_ex->rt6i->dst);
1312
1313         hlist_del_rcu(&rt6_ex->hlist);
1314         dst_release(&rt6_ex->rt6i->dst);
1315         kfree_rcu(rt6_ex, rcu);
1316         WARN_ON_ONCE(!bucket->depth);
1317         bucket->depth--;
1318 }
1319
1320 /* Remove oldest rt6_ex in bucket and free the memory
1321  * Caller must hold rt6_exception_lock
1322  */
1323 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1324 {
1325         struct rt6_exception *rt6_ex, *oldest = NULL;
1326
1327         if (!bucket)
1328                 return;
1329
1330         hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1331                 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1332                         oldest = rt6_ex;
1333         }
1334         rt6_remove_exception(bucket, oldest);
1335 }
1336
1337 static u32 rt6_exception_hash(const struct in6_addr *dst,
1338                               const struct in6_addr *src)
1339 {
1340         static u32 seed __read_mostly;
1341         u32 val;
1342
1343         net_get_random_once(&seed, sizeof(seed));
1344         val = jhash(dst, sizeof(*dst), seed);
1345
1346 #ifdef CONFIG_IPV6_SUBTREES
1347         if (src)
1348                 val = jhash(src, sizeof(*src), val);
1349 #endif
1350         return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1351 }
1352
1353 /* Helper function to find the cached rt in the hash table
1354  * and update bucket pointer to point to the bucket for this
1355  * (daddr, saddr) pair
1356  * Caller must hold rt6_exception_lock
1357  */
1358 static struct rt6_exception *
1359 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1360                               const struct in6_addr *daddr,
1361                               const struct in6_addr *saddr)
1362 {
1363         struct rt6_exception *rt6_ex;
1364         u32 hval;
1365
1366         if (!(*bucket) || !daddr)
1367                 return NULL;
1368
1369         hval = rt6_exception_hash(daddr, saddr);
1370         *bucket += hval;
1371
1372         hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1373                 struct rt6_info *rt6 = rt6_ex->rt6i;
1374                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1375
1376 #ifdef CONFIG_IPV6_SUBTREES
1377                 if (matched && saddr)
1378                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1379 #endif
1380                 if (matched)
1381                         return rt6_ex;
1382         }
1383         return NULL;
1384 }
1385
1386 /* Helper function to find the cached rt in the hash table
1387  * and update bucket pointer to point to the bucket for this
1388  * (daddr, saddr) pair
1389  * Caller must hold rcu_read_lock()
1390  */
1391 static struct rt6_exception *
1392 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1393                          const struct in6_addr *daddr,
1394                          const struct in6_addr *saddr)
1395 {
1396         struct rt6_exception *rt6_ex;
1397         u32 hval;
1398
1399         WARN_ON_ONCE(!rcu_read_lock_held());
1400
1401         if (!(*bucket) || !daddr)
1402                 return NULL;
1403
1404         hval = rt6_exception_hash(daddr, saddr);
1405         *bucket += hval;
1406
1407         hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1408                 struct rt6_info *rt6 = rt6_ex->rt6i;
1409                 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1410
1411 #ifdef CONFIG_IPV6_SUBTREES
1412                 if (matched && saddr)
1413                         matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1414 #endif
1415                 if (matched)
1416                         return rt6_ex;
1417         }
1418         return NULL;
1419 }
1420
1421 static unsigned int fib6_mtu(const struct fib6_info *rt)
1422 {
1423         unsigned int mtu;
1424
1425         if (rt->fib6_pmtu) {
1426                 mtu = rt->fib6_pmtu;
1427         } else {
1428                 struct net_device *dev = fib6_info_nh_dev(rt);
1429                 struct inet6_dev *idev;
1430
1431                 rcu_read_lock();
1432                 idev = __in6_dev_get(dev);
1433                 mtu = idev->cnf.mtu6;
1434                 rcu_read_unlock();
1435         }
1436
1437         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1438
1439         return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1440 }
1441
1442 static int rt6_insert_exception(struct rt6_info *nrt,
1443                                 struct fib6_info *ort)
1444 {
1445         struct net *net = dev_net(nrt->dst.dev);
1446         struct rt6_exception_bucket *bucket;
1447         struct in6_addr *src_key = NULL;
1448         struct rt6_exception *rt6_ex;
1449         int err = 0;
1450
1451         spin_lock_bh(&rt6_exception_lock);
1452
1453         if (ort->exception_bucket_flushed) {
1454                 err = -EINVAL;
1455                 goto out;
1456         }
1457
1458         bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1459                                         lockdep_is_held(&rt6_exception_lock));
1460         if (!bucket) {
1461                 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1462                                  GFP_ATOMIC);
1463                 if (!bucket) {
1464                         err = -ENOMEM;
1465                         goto out;
1466                 }
1467                 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1468         }
1469
1470 #ifdef CONFIG_IPV6_SUBTREES
1471         /* rt6i_src.plen != 0 indicates ort is in subtree
1472          * and exception table is indexed by a hash of
1473          * both rt6i_dst and rt6i_src.
1474          * Otherwise, the exception table is indexed by
1475          * a hash of only rt6i_dst.
1476          */
1477         if (ort->fib6_src.plen)
1478                 src_key = &nrt->rt6i_src.addr;
1479 #endif
1480
1481         /* Update rt6i_prefsrc as it could be changed
1482          * in rt6_remove_prefsrc()
1483          */
1484         nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1485         /* rt6_mtu_change() might lower mtu on ort.
1486          * Only insert this exception route if its mtu
1487          * is less than ort's mtu value.
1488          */
1489         if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1490                 err = -EINVAL;
1491                 goto out;
1492         }
1493
1494         rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1495                                                src_key);
1496         if (rt6_ex)
1497                 rt6_remove_exception(bucket, rt6_ex);
1498
1499         rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1500         if (!rt6_ex) {
1501                 err = -ENOMEM;
1502                 goto out;
1503         }
1504         rt6_ex->rt6i = nrt;
1505         rt6_ex->stamp = jiffies;
1506         hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1507         bucket->depth++;
1508         net->ipv6.rt6_stats->fib_rt_cache++;
1509
1510         if (bucket->depth > FIB6_MAX_DEPTH)
1511                 rt6_exception_remove_oldest(bucket);
1512
1513 out:
1514         spin_unlock_bh(&rt6_exception_lock);
1515
1516         /* Update fn->fn_sernum to invalidate all cached dst */
1517         if (!err) {
1518                 spin_lock_bh(&ort->fib6_table->tb6_lock);
1519                 fib6_update_sernum(net, ort);
1520                 spin_unlock_bh(&ort->fib6_table->tb6_lock);
1521                 fib6_force_start_gc(net);
1522         }
1523
1524         return err;
1525 }
1526
1527 void rt6_flush_exceptions(struct fib6_info *rt)
1528 {
1529         struct rt6_exception_bucket *bucket;
1530         struct rt6_exception *rt6_ex;
1531         struct hlist_node *tmp;
1532         int i;
1533
1534         spin_lock_bh(&rt6_exception_lock);
1535         /* Prevent rt6_insert_exception() to recreate the bucket list */
1536         rt->exception_bucket_flushed = 1;
1537
1538         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1539                                     lockdep_is_held(&rt6_exception_lock));
1540         if (!bucket)
1541                 goto out;
1542
1543         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1544                 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1545                         rt6_remove_exception(bucket, rt6_ex);
1546                 WARN_ON_ONCE(bucket->depth);
1547                 bucket++;
1548         }
1549
1550 out:
1551         spin_unlock_bh(&rt6_exception_lock);
1552 }
1553
1554 /* Find cached rt in the hash table inside passed in rt
1555  * Caller has to hold rcu_read_lock()
1556  */
1557 static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
1558                                            const struct in6_addr *daddr,
1559                                            const struct in6_addr *saddr)
1560 {
1561         const struct in6_addr *src_key = NULL;
1562         struct rt6_exception_bucket *bucket;
1563         struct rt6_exception *rt6_ex;
1564         struct rt6_info *res = NULL;
1565
1566 #ifdef CONFIG_IPV6_SUBTREES
1567         /* rt6i_src.plen != 0 indicates rt is in subtree
1568          * and exception table is indexed by a hash of
1569          * both rt6i_dst and rt6i_src.
1570          * However, the src addr used to create the hash
1571          * might not be exactly the passed in saddr which
1572          * is a /128 addr from the flow.
1573          * So we need to use f6i->fib6_src to redo lookup
1574          * if the passed in saddr does not find anything.
1575          * (See the logic in ip6_rt_cache_alloc() on how
1576          * rt->rt6i_src is updated.)
1577          */
1578         if (rt->fib6_src.plen)
1579                 src_key = saddr;
1580 find_ex:
1581 #endif
1582         bucket = rcu_dereference(rt->rt6i_exception_bucket);
1583         rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1584
1585         if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1586                 res = rt6_ex->rt6i;
1587
1588 #ifdef CONFIG_IPV6_SUBTREES
1589         /* Use fib6_src as src_key and redo lookup */
1590         if (!res && src_key && src_key != &rt->fib6_src.addr) {
1591                 src_key = &rt->fib6_src.addr;
1592                 goto find_ex;
1593         }
1594 #endif
1595
1596         return res;
1597 }
1598
1599 /* Remove the passed in cached rt from the hash table that contains it */
1600 static int rt6_remove_exception_rt(struct rt6_info *rt)
1601 {
1602         struct rt6_exception_bucket *bucket;
1603         struct in6_addr *src_key = NULL;
1604         struct rt6_exception *rt6_ex;
1605         struct fib6_info *from;
1606         int err;
1607
1608         from = rcu_dereference(rt->from);
1609         if (!from ||
1610             !(rt->rt6i_flags & RTF_CACHE))
1611                 return -EINVAL;
1612
1613         if (!rcu_access_pointer(from->rt6i_exception_bucket))
1614                 return -ENOENT;
1615
1616         spin_lock_bh(&rt6_exception_lock);
1617         bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1618                                     lockdep_is_held(&rt6_exception_lock));
1619 #ifdef CONFIG_IPV6_SUBTREES
1620         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1621          * and exception table is indexed by a hash of
1622          * both rt6i_dst and rt6i_src.
1623          * Otherwise, the exception table is indexed by
1624          * a hash of only rt6i_dst.
1625          */
1626         if (from->fib6_src.plen)
1627                 src_key = &rt->rt6i_src.addr;
1628 #endif
1629         rt6_ex = __rt6_find_exception_spinlock(&bucket,
1630                                                &rt->rt6i_dst.addr,
1631                                                src_key);
1632         if (rt6_ex) {
1633                 rt6_remove_exception(bucket, rt6_ex);
1634                 err = 0;
1635         } else {
1636                 err = -ENOENT;
1637         }
1638
1639         spin_unlock_bh(&rt6_exception_lock);
1640         return err;
1641 }
1642
1643 /* Find rt6_ex which contains the passed in rt cache and
1644  * refresh its stamp
1645  */
1646 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1647 {
1648         struct rt6_exception_bucket *bucket;
1649         struct in6_addr *src_key = NULL;
1650         struct rt6_exception *rt6_ex;
1651         struct fib6_info *from;
1652
1653         rcu_read_lock();
1654         from = rcu_dereference(rt->from);
1655         if (!from || !(rt->rt6i_flags & RTF_CACHE))
1656                 goto unlock;
1657
1658         bucket = rcu_dereference(from->rt6i_exception_bucket);
1659
1660 #ifdef CONFIG_IPV6_SUBTREES
1661         /* rt6i_src.plen != 0 indicates 'from' is in subtree
1662          * and exception table is indexed by a hash of
1663          * both rt6i_dst and rt6i_src.
1664          * Otherwise, the exception table is indexed by
1665          * a hash of only rt6i_dst.
1666          */
1667         if (from->fib6_src.plen)
1668                 src_key = &rt->rt6i_src.addr;
1669 #endif
1670         rt6_ex = __rt6_find_exception_rcu(&bucket,
1671                                           &rt->rt6i_dst.addr,
1672                                           src_key);
1673         if (rt6_ex)
1674                 rt6_ex->stamp = jiffies;
1675
1676 unlock:
1677         rcu_read_unlock();
1678 }
1679
1680 static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
1681 {
1682         struct rt6_exception_bucket *bucket;
1683         struct rt6_exception *rt6_ex;
1684         int i;
1685
1686         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1687                                         lockdep_is_held(&rt6_exception_lock));
1688
1689         if (bucket) {
1690                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1691                         hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1692                                 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1693                         }
1694                         bucket++;
1695                 }
1696         }
1697 }
1698
1699 static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1700                                          struct rt6_info *rt, int mtu)
1701 {
1702         /* If the new MTU is lower than the route PMTU, this new MTU will be the
1703          * lowest MTU in the path: always allow updating the route PMTU to
1704          * reflect PMTU decreases.
1705          *
1706          * If the new MTU is higher, and the route PMTU is equal to the local
1707          * MTU, this means the old MTU is the lowest in the path, so allow
1708          * updating it: if other nodes now have lower MTUs, PMTU discovery will
1709          * handle this.
1710          */
1711
1712         if (dst_mtu(&rt->dst) >= mtu)
1713                 return true;
1714
1715         if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1716                 return true;
1717
1718         return false;
1719 }
1720
1721 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1722                                        struct fib6_info *rt, int mtu)
1723 {
1724         struct rt6_exception_bucket *bucket;
1725         struct rt6_exception *rt6_ex;
1726         int i;
1727
1728         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1729                                         lockdep_is_held(&rt6_exception_lock));
1730
1731         if (!bucket)
1732                 return;
1733
1734         for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1735                 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1736                         struct rt6_info *entry = rt6_ex->rt6i;
1737
1738                         /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1739                          * route), the metrics of its rt->from have already
1740                          * been updated.
1741                          */
1742                         if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1743                             rt6_mtu_change_route_allowed(idev, entry, mtu))
1744                                 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1745                 }
1746                 bucket++;
1747         }
1748 }
1749
1750 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
1751
1752 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1753                                         struct in6_addr *gateway)
1754 {
1755         struct rt6_exception_bucket *bucket;
1756         struct rt6_exception *rt6_ex;
1757         struct hlist_node *tmp;
1758         int i;
1759
1760         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1761                 return;
1762
1763         spin_lock_bh(&rt6_exception_lock);
1764         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1765                                      lockdep_is_held(&rt6_exception_lock));
1766
1767         if (bucket) {
1768                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1769                         hlist_for_each_entry_safe(rt6_ex, tmp,
1770                                                   &bucket->chain, hlist) {
1771                                 struct rt6_info *entry = rt6_ex->rt6i;
1772
1773                                 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1774                                     RTF_CACHE_GATEWAY &&
1775                                     ipv6_addr_equal(gateway,
1776                                                     &entry->rt6i_gateway)) {
1777                                         rt6_remove_exception(bucket, rt6_ex);
1778                                 }
1779                         }
1780                         bucket++;
1781                 }
1782         }
1783
1784         spin_unlock_bh(&rt6_exception_lock);
1785 }
1786
1787 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1788                                       struct rt6_exception *rt6_ex,
1789                                       struct fib6_gc_args *gc_args,
1790                                       unsigned long now)
1791 {
1792         struct rt6_info *rt = rt6_ex->rt6i;
1793
1794         /* we are pruning and obsoleting aged-out and non gateway exceptions
1795          * even if others have still references to them, so that on next
1796          * dst_check() such references can be dropped.
1797          * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1798          * expired, independently from their aging, as per RFC 8201 section 4
1799          */
1800         if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1801                 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1802                         RT6_TRACE("aging clone %p\n", rt);
1803                         rt6_remove_exception(bucket, rt6_ex);
1804                         return;
1805                 }
1806         } else if (time_after(jiffies, rt->dst.expires)) {
1807                 RT6_TRACE("purging expired route %p\n", rt);
1808                 rt6_remove_exception(bucket, rt6_ex);
1809                 return;
1810         }
1811
1812         if (rt->rt6i_flags & RTF_GATEWAY) {
1813                 struct neighbour *neigh;
1814                 __u8 neigh_flags = 0;
1815
1816                 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1817                 if (neigh)
1818                         neigh_flags = neigh->flags;
1819
1820                 if (!(neigh_flags & NTF_ROUTER)) {
1821                         RT6_TRACE("purging route %p via non-router but gateway\n",
1822                                   rt);
1823                         rt6_remove_exception(bucket, rt6_ex);
1824                         return;
1825                 }
1826         }
1827
1828         gc_args->more++;
1829 }
1830
1831 void rt6_age_exceptions(struct fib6_info *rt,
1832                         struct fib6_gc_args *gc_args,
1833                         unsigned long now)
1834 {
1835         struct rt6_exception_bucket *bucket;
1836         struct rt6_exception *rt6_ex;
1837         struct hlist_node *tmp;
1838         int i;
1839
1840         if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1841                 return;
1842
1843         rcu_read_lock_bh();
1844         spin_lock(&rt6_exception_lock);
1845         bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1846                                     lockdep_is_held(&rt6_exception_lock));
1847
1848         if (bucket) {
1849                 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1850                         hlist_for_each_entry_safe(rt6_ex, tmp,
1851                                                   &bucket->chain, hlist) {
1852                                 rt6_age_examine_exception(bucket, rt6_ex,
1853                                                           gc_args, now);
1854                         }
1855                         bucket++;
1856                 }
1857         }
1858         spin_unlock(&rt6_exception_lock);
1859         rcu_read_unlock_bh();
1860 }
1861
1862 /* must be called with rcu lock held */
1863 struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1864                                     int oif, struct flowi6 *fl6, int strict)
1865 {
1866         struct fib6_node *fn, *saved_fn;
1867         struct fib6_info *f6i;
1868
1869         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1870         saved_fn = fn;
1871
1872         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1873                 oif = 0;
1874
1875 redo_rt6_select:
1876         f6i = rt6_select(net, fn, oif, strict);
1877         if (f6i == net->ipv6.fib6_null_entry) {
1878                 fn = fib6_backtrack(fn, &fl6->saddr);
1879                 if (fn)
1880                         goto redo_rt6_select;
1881                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1882                         /* also consider unreachable route */
1883                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1884                         fn = saved_fn;
1885                         goto redo_rt6_select;
1886                 }
1887         }
1888
1889         trace_fib6_table_lookup(net, f6i, table, fl6);
1890
1891         return f6i;
1892 }
1893
1894 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1895                                int oif, struct flowi6 *fl6,
1896                                const struct sk_buff *skb, int flags)
1897 {
1898         struct fib6_info *f6i;
1899         struct rt6_info *rt;
1900         int strict = 0;
1901
1902         strict |= flags & RT6_LOOKUP_F_IFACE;
1903         strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1904         if (net->ipv6.devconf_all->forwarding == 0)
1905                 strict |= RT6_LOOKUP_F_REACHABLE;
1906
1907         rcu_read_lock();
1908
1909         f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1910         if (f6i->fib6_nsiblings)
1911                 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1912
1913         if (f6i == net->ipv6.fib6_null_entry) {
1914                 rt = net->ipv6.ip6_null_entry;
1915                 rcu_read_unlock();
1916                 dst_hold(&rt->dst);
1917                 return rt;
1918         }
1919
1920         /*Search through exception table */
1921         rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1922         if (rt) {
1923                 if (ip6_hold_safe(net, &rt, true))
1924                         dst_use_noref(&rt->dst, jiffies);
1925
1926                 rcu_read_unlock();
1927                 return rt;
1928         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1929                             !(f6i->fib6_flags & RTF_GATEWAY))) {
1930                 /* Create a RTF_CACHE clone which will not be
1931                  * owned by the fib6 tree.  It is for the special case where
1932                  * the daddr in the skb during the neighbor look-up is different
1933                  * from the fl6->daddr used to look-up route here.
1934                  */
1935                 struct rt6_info *uncached_rt;
1936
1937                 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1938
1939                 rcu_read_unlock();
1940
1941                 if (uncached_rt) {
1942                         /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1943                          * No need for another dst_hold()
1944                          */
1945                         rt6_uncached_list_add(uncached_rt);
1946                         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1947                 } else {
1948                         uncached_rt = net->ipv6.ip6_null_entry;
1949                         dst_hold(&uncached_rt->dst);
1950                 }
1951
1952                 return uncached_rt;
1953         } else {
1954                 /* Get a percpu copy */
1955
1956                 struct rt6_info *pcpu_rt;
1957
1958                 local_bh_disable();
1959                 pcpu_rt = rt6_get_pcpu_route(f6i);
1960
1961                 if (!pcpu_rt)
1962                         pcpu_rt = rt6_make_pcpu_route(net, f6i);
1963
1964                 local_bh_enable();
1965                 rcu_read_unlock();
1966
1967                 return pcpu_rt;
1968         }
1969 }
1970 EXPORT_SYMBOL_GPL(ip6_pol_route);
1971
1972 static struct rt6_info *ip6_pol_route_input(struct net *net,
1973                                             struct fib6_table *table,
1974                                             struct flowi6 *fl6,
1975                                             const struct sk_buff *skb,
1976                                             int flags)
1977 {
1978         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
1979 }
1980
1981 struct dst_entry *ip6_route_input_lookup(struct net *net,
1982                                          struct net_device *dev,
1983                                          struct flowi6 *fl6,
1984                                          const struct sk_buff *skb,
1985                                          int flags)
1986 {
1987         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1988                 flags |= RT6_LOOKUP_F_IFACE;
1989
1990         return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
1991 }
1992 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1993
1994 static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1995                                   struct flow_keys *keys,
1996                                   struct flow_keys *flkeys)
1997 {
1998         const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1999         const struct ipv6hdr *key_iph = outer_iph;
2000         struct flow_keys *_flkeys = flkeys;
2001         const struct ipv6hdr *inner_iph;
2002         const struct icmp6hdr *icmph;
2003         struct ipv6hdr _inner_iph;
2004         struct icmp6hdr _icmph;
2005
2006         if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2007                 goto out;
2008
2009         icmph = skb_header_pointer(skb, skb_transport_offset(skb),
2010                                    sizeof(_icmph), &_icmph);
2011         if (!icmph)
2012                 goto out;
2013
2014         if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2015             icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2016             icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2017             icmph->icmp6_type != ICMPV6_PARAMPROB)
2018                 goto out;
2019
2020         inner_iph = skb_header_pointer(skb,
2021                                        skb_transport_offset(skb) + sizeof(*icmph),
2022                                        sizeof(_inner_iph), &_inner_iph);
2023         if (!inner_iph)
2024                 goto out;
2025
2026         key_iph = inner_iph;
2027         _flkeys = NULL;
2028 out:
2029         if (_flkeys) {
2030                 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2031                 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2032                 keys->tags.flow_label = _flkeys->tags.flow_label;
2033                 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2034         } else {
2035                 keys->addrs.v6addrs.src = key_iph->saddr;
2036                 keys->addrs.v6addrs.dst = key_iph->daddr;
2037                 keys->tags.flow_label = ip6_flowlabel(key_iph);
2038                 keys->basic.ip_proto = key_iph->nexthdr;
2039         }
2040 }
2041
2042 /* if skb is set it will be used and fl6 can be NULL */
2043 u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2044                        const struct sk_buff *skb, struct flow_keys *flkeys)
2045 {
2046         struct flow_keys hash_keys;
2047         u32 mhash;
2048
2049         switch (ip6_multipath_hash_policy(net)) {
2050         case 0:
2051                 memset(&hash_keys, 0, sizeof(hash_keys));
2052                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2053                 if (skb) {
2054                         ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2055                 } else {
2056                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2057                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2058                         hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
2059                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2060                 }
2061                 break;
2062         case 1:
2063                 if (skb) {
2064                         unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2065                         struct flow_keys keys;
2066
2067                         /* short-circuit if we already have L4 hash present */
2068                         if (skb->l4_hash)
2069                                 return skb_get_hash_raw(skb) >> 1;
2070
2071                         memset(&hash_keys, 0, sizeof(hash_keys));
2072
2073                         if (!flkeys) {
2074                                 skb_flow_dissect_flow_keys(skb, &keys, flag);
2075                                 flkeys = &keys;
2076                         }
2077                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2078                         hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2079                         hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2080                         hash_keys.ports.src = flkeys->ports.src;
2081                         hash_keys.ports.dst = flkeys->ports.dst;
2082                         hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2083                 } else {
2084                         memset(&hash_keys, 0, sizeof(hash_keys));
2085                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2086                         hash_keys.addrs.v6addrs.src = fl6->saddr;
2087                         hash_keys.addrs.v6addrs.dst = fl6->daddr;
2088                         hash_keys.ports.src = fl6->fl6_sport;
2089                         hash_keys.ports.dst = fl6->fl6_dport;
2090                         hash_keys.basic.ip_proto = fl6->flowi6_proto;
2091                 }
2092                 break;
2093         }
2094         mhash = flow_hash_from_keys(&hash_keys);
2095
2096         return mhash >> 1;
2097 }
2098
2099 void ip6_route_input(struct sk_buff *skb)
2100 {
2101         const struct ipv6hdr *iph = ipv6_hdr(skb);
2102         struct net *net = dev_net(skb->dev);
2103         int flags = RT6_LOOKUP_F_HAS_SADDR;
2104         struct ip_tunnel_info *tun_info;
2105         struct flowi6 fl6 = {
2106                 .flowi6_iif = skb->dev->ifindex,
2107                 .daddr = iph->daddr,
2108                 .saddr = iph->saddr,
2109                 .flowlabel = ip6_flowinfo(iph),
2110                 .flowi6_mark = skb->mark,
2111                 .flowi6_proto = iph->nexthdr,
2112         };
2113         struct flow_keys *flkeys = NULL, _flkeys;
2114
2115         tun_info = skb_tunnel_info(skb);
2116         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2117                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
2118
2119         if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2120                 flkeys = &_flkeys;
2121
2122         if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2123                 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
2124         skb_dst_drop(skb);
2125         skb_dst_set(skb,
2126                     ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2127 }
2128
2129 static struct rt6_info *ip6_pol_route_output(struct net *net,
2130                                              struct fib6_table *table,
2131                                              struct flowi6 *fl6,
2132                                              const struct sk_buff *skb,
2133                                              int flags)
2134 {
2135         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2136 }
2137
2138 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2139                                          struct flowi6 *fl6, int flags)
2140 {
2141         bool any_src;
2142
2143         if (rt6_need_strict(&fl6->daddr)) {
2144                 struct dst_entry *dst;
2145
2146                 dst = l3mdev_link_scope_lookup(net, fl6);
2147                 if (dst)
2148                         return dst;
2149         }
2150
2151         fl6->flowi6_iif = LOOPBACK_IFINDEX;
2152
2153         any_src = ipv6_addr_any(&fl6->saddr);
2154         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2155             (fl6->flowi6_oif && any_src))
2156                 flags |= RT6_LOOKUP_F_IFACE;
2157
2158         if (!any_src)
2159                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2160         else if (sk)
2161                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2162
2163         return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
2164 }
2165 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
2166
2167 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
2168 {
2169         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
2170         struct net_device *loopback_dev = net->loopback_dev;
2171         struct dst_entry *new = NULL;
2172
2173         rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
2174                        DST_OBSOLETE_DEAD, 0);
2175         if (rt) {
2176                 rt6_info_init(rt);
2177                 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
2178
2179                 new = &rt->dst;
2180                 new->__use = 1;
2181                 new->input = dst_discard;
2182                 new->output = dst_discard_out;
2183
2184                 dst_copy_metrics(new, &ort->dst);
2185
2186                 rt->rt6i_idev = in6_dev_get(loopback_dev);
2187                 rt->rt6i_gateway = ort->rt6i_gateway;
2188                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
2189
2190                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2191 #ifdef CONFIG_IPV6_SUBTREES
2192                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2193 #endif
2194         }
2195
2196         dst_release(dst_orig);
2197         return new ? new : ERR_PTR(-ENOMEM);
2198 }
2199
2200 /*
2201  *      Destination cache support functions
2202  */
2203
2204 static bool fib6_check(struct fib6_info *f6i, u32 cookie)
2205 {
2206         u32 rt_cookie = 0;
2207
2208         if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
2209                 return false;
2210
2211         if (fib6_check_expired(f6i))
2212                 return false;
2213
2214         return true;
2215 }
2216
2217 static struct dst_entry *rt6_check(struct rt6_info *rt,
2218                                    struct fib6_info *from,
2219                                    u32 cookie)
2220 {
2221         u32 rt_cookie = 0;
2222
2223         if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
2224             rt_cookie != cookie)
2225                 return NULL;
2226
2227         if (rt6_check_expired(rt))
2228                 return NULL;
2229
2230         return &rt->dst;
2231 }
2232
2233 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2234                                             struct fib6_info *from,
2235                                             u32 cookie)
2236 {
2237         if (!__rt6_check_expired(rt) &&
2238             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2239             fib6_check(from, cookie))
2240                 return &rt->dst;
2241         else
2242                 return NULL;
2243 }
2244
2245 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2246 {
2247         struct dst_entry *dst_ret;
2248         struct fib6_info *from;
2249         struct rt6_info *rt;
2250
2251         rt = container_of(dst, struct rt6_info, dst);
2252
2253         rcu_read_lock();
2254
2255         /* All IPV6 dsts are created with ->obsolete set to the value
2256          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2257          * into this function always.
2258          */
2259
2260         from = rcu_dereference(rt->from);
2261
2262         if (from && (rt->rt6i_flags & RTF_PCPU ||
2263             unlikely(!list_empty(&rt->rt6i_uncached))))
2264                 dst_ret = rt6_dst_from_check(rt, from, cookie);
2265         else
2266                 dst_ret = rt6_check(rt, from, cookie);
2267
2268         rcu_read_unlock();
2269
2270         return dst_ret;
2271 }
2272
2273 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2274 {
2275         struct rt6_info *rt = (struct rt6_info *) dst;
2276
2277         if (rt) {
2278                 if (rt->rt6i_flags & RTF_CACHE) {
2279                         rcu_read_lock();
2280                         if (rt6_check_expired(rt)) {
2281                                 rt6_remove_exception_rt(rt);
2282                                 dst = NULL;
2283                         }
2284                         rcu_read_unlock();
2285                 } else {
2286                         dst_release(dst);
2287                         dst = NULL;
2288                 }
2289         }
2290         return dst;
2291 }
2292
2293 static void ip6_link_failure(struct sk_buff *skb)
2294 {
2295         struct rt6_info *rt;
2296
2297         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
2298
2299         rt = (struct rt6_info *) skb_dst(skb);
2300         if (rt) {
2301                 rcu_read_lock();
2302                 if (rt->rt6i_flags & RTF_CACHE) {
2303                         rt6_remove_exception_rt(rt);
2304                 } else {
2305                         struct fib6_info *from;
2306                         struct fib6_node *fn;
2307
2308                         from = rcu_dereference(rt->from);
2309                         if (from) {
2310                                 fn = rcu_dereference(from->fib6_node);
2311                                 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2312                                         fn->fn_sernum = -1;
2313                         }
2314                 }
2315                 rcu_read_unlock();
2316         }
2317 }
2318
2319 static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2320 {
2321         if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2322                 struct fib6_info *from;
2323
2324                 rcu_read_lock();
2325                 from = rcu_dereference(rt0->from);
2326                 if (from)
2327                         rt0->dst.expires = from->expires;
2328                 rcu_read_unlock();
2329         }
2330
2331         dst_set_expires(&rt0->dst, timeout);
2332         rt0->rt6i_flags |= RTF_EXPIRES;
2333 }
2334
2335 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2336 {
2337         struct net *net = dev_net(rt->dst.dev);
2338
2339         dst_metric_set(&rt->dst, RTAX_MTU, mtu);
2340         rt->rt6i_flags |= RTF_MODIFIED;
2341         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2342 }
2343
2344 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2345 {
2346         bool from_set;
2347
2348         rcu_read_lock();
2349         from_set = !!rcu_dereference(rt->from);
2350         rcu_read_unlock();
2351
2352         return !(rt->rt6i_flags & RTF_CACHE) &&
2353                 (rt->rt6i_flags & RTF_PCPU || from_set);
2354 }
2355
2356 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2357                                  const struct ipv6hdr *iph, u32 mtu,
2358                                  bool confirm_neigh)
2359 {
2360         const struct in6_addr *daddr, *saddr;
2361         struct rt6_info *rt6 = (struct rt6_info *)dst;
2362
2363         /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
2364          * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
2365          * [see also comment in rt6_mtu_change_route()]
2366          */
2367
2368         if (iph) {
2369                 daddr = &iph->daddr;
2370                 saddr = &iph->saddr;
2371         } else if (sk) {
2372                 daddr = &sk->sk_v6_daddr;
2373                 saddr = &inet6_sk(sk)->saddr;
2374         } else {
2375                 daddr = NULL;
2376                 saddr = NULL;
2377         }
2378
2379         if (confirm_neigh)
2380                 dst_confirm_neigh(dst, daddr);
2381
2382         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2383         if (mtu >= dst_mtu(dst))
2384                 return;
2385
2386         if (!rt6_cache_allowed_for_pmtu(rt6)) {
2387                 rt6_do_update_pmtu(rt6, mtu);
2388                 /* update rt6_ex->stamp for cache */
2389                 if (rt6->rt6i_flags & RTF_CACHE)
2390                         rt6_update_exception_stamp_rt(rt6);
2391         } else if (daddr) {
2392                 struct fib6_info *from;
2393                 struct rt6_info *nrt6;
2394
2395                 rcu_read_lock();
2396                 from = rcu_dereference(rt6->from);
2397                 if (!from) {
2398                         rcu_read_unlock();
2399                         return;
2400                 }
2401                 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
2402                 if (nrt6) {
2403                         rt6_do_update_pmtu(nrt6, mtu);
2404                         if (rt6_insert_exception(nrt6, from))
2405                                 dst_release_immediate(&nrt6->dst);
2406                 }
2407                 rcu_read_unlock();
2408         }
2409 }
2410
2411 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2412                                struct sk_buff *skb, u32 mtu,
2413                                bool confirm_neigh)
2414 {
2415         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
2416                              confirm_neigh);
2417 }
2418
2419 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2420                      int oif, u32 mark, kuid_t uid)
2421 {
2422         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2423         struct dst_entry *dst;
2424         struct flowi6 fl6;
2425
2426         memset(&fl6, 0, sizeof(fl6));
2427         fl6.flowi6_oif = oif;
2428         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
2429         fl6.daddr = iph->daddr;
2430         fl6.saddr = iph->saddr;
2431         fl6.flowlabel = ip6_flowinfo(iph);
2432         fl6.flowi6_uid = uid;
2433
2434         dst = ip6_route_output(net, NULL, &fl6);
2435         if (!dst->error)
2436                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
2437         dst_release(dst);
2438 }
2439 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2440
2441 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2442 {
2443         int oif = sk->sk_bound_dev_if;
2444         struct dst_entry *dst;
2445
2446         if (!oif && skb->dev)
2447                 oif = l3mdev_master_ifindex(skb->dev);
2448
2449         ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
2450
2451         dst = __sk_dst_get(sk);
2452         if (!dst || !dst->obsolete ||
2453             dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2454                 return;
2455
2456         bh_lock_sock(sk);
2457         if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2458                 ip6_datagram_dst_update(sk, false);
2459         bh_unlock_sock(sk);
2460 }
2461 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2462
2463 void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2464                            const struct flowi6 *fl6)
2465 {
2466 #ifdef CONFIG_IPV6_SUBTREES
2467         struct ipv6_pinfo *np = inet6_sk(sk);
2468 #endif
2469
2470         ip6_dst_store(sk, dst,
2471                       ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2472                       &sk->sk_v6_daddr : NULL,
2473 #ifdef CONFIG_IPV6_SUBTREES
2474                       ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2475                       &np->saddr :
2476 #endif
2477                       NULL);
2478 }
2479
2480 /* Handle redirects */
2481 struct ip6rd_flowi {
2482         struct flowi6 fl6;
2483         struct in6_addr gateway;
2484 };
2485
2486 static struct rt6_info *__ip6_route_redirect(struct net *net,
2487                                              struct fib6_table *table,
2488                                              struct flowi6 *fl6,
2489                                              const struct sk_buff *skb,
2490                                              int flags)
2491 {
2492         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2493         struct rt6_info *ret = NULL, *rt_cache;
2494         struct fib6_info *rt;
2495         struct fib6_node *fn;
2496
2497         /* l3mdev_update_flow overrides oif if the device is enslaved; in
2498          * this case we must match on the real ingress device, so reset it
2499          */
2500         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
2501                 fl6->flowi6_oif = skb->dev->ifindex;
2502
2503         /* Get the "current" route for this destination and
2504          * check if the redirect has come from appropriate router.
2505          *
2506          * RFC 4861 specifies that redirects should only be
2507          * accepted if they come from the nexthop to the target.
2508          * Due to the way the routes are chosen, this notion
2509          * is a bit fuzzy and one might need to check all possible
2510          * routes.
2511          */
2512
2513         rcu_read_lock();
2514         fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2515 restart:
2516         for_each_fib6_node_rt_rcu(fn) {
2517                 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
2518                         continue;
2519                 if (fib6_check_expired(rt))
2520                         continue;
2521                 if (rt->fib6_flags & RTF_REJECT)
2522                         break;
2523                 if (!(rt->fib6_flags & RTF_GATEWAY))
2524                         continue;
2525                 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2526                         continue;
2527                 /* rt_cache's gateway might be different from its 'parent'
2528                  * in the case of an ip redirect.
2529                  * So we keep searching in the exception table if the gateway
2530                  * is different.
2531                  */
2532                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2533                         rt_cache = rt6_find_cached_rt(rt,
2534                                                       &fl6->daddr,
2535                                                       &fl6->saddr);
2536                         if (rt_cache &&
2537                             ipv6_addr_equal(&rdfl->gateway,
2538                                             &rt_cache->rt6i_gateway)) {
2539                                 ret = rt_cache;
2540                                 break;
2541                         }
2542                         continue;
2543                 }
2544                 break;
2545         }
2546
2547         if (!rt)
2548                 rt = net->ipv6.fib6_null_entry;
2549         else if (rt->fib6_flags & RTF_REJECT) {
2550                 ret = net->ipv6.ip6_null_entry;
2551                 goto out;
2552         }
2553
2554         if (rt == net->ipv6.fib6_null_entry) {
2555                 fn = fib6_backtrack(fn, &fl6->saddr);
2556                 if (fn)
2557                         goto restart;
2558         }
2559
2560 out:
2561         if (ret)
2562                 ip6_hold_safe(net, &ret, true);
2563         else
2564                 ret = ip6_create_rt_rcu(rt);
2565
2566         rcu_read_unlock();
2567
2568         trace_fib6_table_lookup(net, rt, table, fl6);
2569         return ret;
2570 };
2571
2572 static struct dst_entry *ip6_route_redirect(struct net *net,
2573                                             const struct flowi6 *fl6,
2574                                             const struct sk_buff *skb,
2575                                             const struct in6_addr *gateway)
2576 {
2577         int flags = RT6_LOOKUP_F_HAS_SADDR;
2578         struct ip6rd_flowi rdfl;
2579
2580         rdfl.fl6 = *fl6;
2581         rdfl.gateway = *gateway;
2582
2583         return fib6_rule_lookup(net, &rdfl.fl6, skb,
2584                                 flags, __ip6_route_redirect);
2585 }
2586
2587 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2588                   kuid_t uid)
2589 {
2590         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2591         struct dst_entry *dst;
2592         struct flowi6 fl6;
2593
2594         memset(&fl6, 0, sizeof(fl6));
2595         fl6.flowi6_iif = LOOPBACK_IFINDEX;
2596         fl6.flowi6_oif = oif;
2597         fl6.flowi6_mark = mark;
2598         fl6.daddr = iph->daddr;
2599         fl6.saddr = iph->saddr;
2600         fl6.flowlabel = ip6_flowinfo(iph);
2601         fl6.flowi6_uid = uid;
2602
2603         dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
2604         rt6_do_redirect(dst, NULL, skb);
2605         dst_release(dst);
2606 }
2607 EXPORT_SYMBOL_GPL(ip6_redirect);
2608
2609 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2610                             u32 mark)
2611 {
2612         const struct ipv6hdr *iph = ipv6_hdr(skb);
2613         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2614         struct dst_entry *dst;
2615         struct flowi6 fl6;
2616
2617         memset(&fl6, 0, sizeof(fl6));
2618         fl6.flowi6_iif = LOOPBACK_IFINDEX;
2619         fl6.flowi6_oif = oif;
2620         fl6.flowi6_mark = mark;
2621         fl6.daddr = msg->dest;
2622         fl6.saddr = iph->daddr;
2623         fl6.flowi6_uid = sock_net_uid(net, NULL);
2624
2625         dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2626         rt6_do_redirect(dst, NULL, skb);
2627         dst_release(dst);
2628 }
2629
2630 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2631 {
2632         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2633                      sk->sk_uid);
2634 }
2635 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2636
2637 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
2638 {
2639         struct net_device *dev = dst->dev;
2640         unsigned int mtu = dst_mtu(dst);
2641         struct net *net = dev_net(dev);
2642
2643         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2644
2645         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2646                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
2647
2648         /*
2649          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2650          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2651          * IPV6_MAXPLEN is also valid and means: "any MSS,
2652          * rely only on pmtu discovery"
2653          */
2654         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2655                 mtu = IPV6_MAXPLEN;
2656         return mtu;
2657 }
2658
2659 static unsigned int ip6_mtu(const struct dst_entry *dst)
2660 {
2661         struct inet6_dev *idev;
2662         unsigned int mtu;
2663
2664         mtu = dst_metric_raw(dst, RTAX_MTU);
2665         if (mtu)
2666                 goto out;
2667
2668         mtu = IPV6_MIN_MTU;
2669
2670         rcu_read_lock();
2671         idev = __in6_dev_get(dst->dev);
2672         if (idev)
2673                 mtu = idev->cnf.mtu6;
2674         rcu_read_unlock();
2675
2676 out:
2677         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2678
2679         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2680 }
2681
2682 /* MTU selection:
2683  * 1. mtu on route is locked - use it
2684  * 2. mtu from nexthop exception
2685  * 3. mtu from egress device
2686  *
2687  * based on ip6_dst_mtu_forward and exception logic of
2688  * rt6_find_cached_rt; called with rcu_read_lock
2689  */
2690 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2691                       struct in6_addr *saddr)
2692 {
2693         struct inet6_dev *idev;
2694         struct rt6_info *rt;
2695         u32 mtu = 0;
2696
2697         if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2698                 mtu = f6i->fib6_pmtu;
2699                 if (mtu)
2700                         goto out;
2701         }
2702
2703         rt = rt6_find_cached_rt(f6i, daddr, saddr);
2704         if (unlikely(rt)) {
2705                 mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
2706         } else {
2707                 struct net_device *dev = fib6_info_nh_dev(f6i);
2708
2709                 mtu = IPV6_MIN_MTU;
2710                 idev = __in6_dev_get(dev);
2711                 if (idev && idev->cnf.mtu6 > mtu)
2712                         mtu = idev->cnf.mtu6;
2713         }
2714
2715         mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2716 out:
2717         return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2718 }
2719
2720 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
2721                                   struct flowi6 *fl6)
2722 {
2723         struct dst_entry *dst;
2724         struct rt6_info *rt;
2725         struct inet6_dev *idev = in6_dev_get(dev);
2726         struct net *net = dev_net(dev);
2727
2728         if (unlikely(!idev))
2729                 return ERR_PTR(-ENODEV);
2730
2731         rt = ip6_dst_alloc(net, dev, 0);
2732         if (unlikely(!rt)) {
2733                 in6_dev_put(idev);
2734                 dst = ERR_PTR(-ENOMEM);
2735                 goto out;
2736         }
2737
2738         rt->dst.flags |= DST_HOST;
2739         rt->dst.input = ip6_input;
2740         rt->dst.output  = ip6_output;
2741         rt->rt6i_gateway  = fl6->daddr;
2742         rt->rt6i_dst.addr = fl6->daddr;
2743         rt->rt6i_dst.plen = 128;
2744         rt->rt6i_idev     = idev;
2745         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
2746
2747         /* Add this dst into uncached_list so that rt6_disable_ip() can
2748          * do proper release of the net_device
2749          */
2750         rt6_uncached_list_add(rt);
2751         atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
2752
2753         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2754
2755 out:
2756         return dst;
2757 }
2758
2759 static int ip6_dst_gc(struct dst_ops *ops)
2760 {
2761         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
2762         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2763         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2764         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2765         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2766         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2767         int entries;
2768
2769         entries = dst_entries_get_fast(ops);
2770         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2771             entries <= rt_max_size)
2772                 goto out;
2773
2774         net->ipv6.ip6_rt_gc_expire++;
2775         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2776         entries = dst_entries_get_slow(ops);
2777         if (entries < ops->gc_thresh)
2778                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
2779 out:
2780         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2781         return entries > rt_max_size;
2782 }
2783
2784 static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2785                                struct fib6_config *cfg)
2786 {
2787         struct dst_metrics *p;
2788
2789         if (!cfg->fc_mx)
2790                 return 0;
2791
2792         p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2793         if (unlikely(!p))
2794                 return -ENOMEM;
2795
2796         refcount_set(&p->refcnt, 1);
2797         rt->fib6_metrics = p;
2798
2799         return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
2800 }
2801
2802 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2803                                             struct fib6_config *cfg,
2804                                             const struct in6_addr *gw_addr,
2805                                             u32 tbid, int flags)
2806 {
2807         struct flowi6 fl6 = {
2808                 .flowi6_oif = cfg->fc_ifindex,
2809                 .daddr = *gw_addr,
2810                 .saddr = cfg->fc_prefsrc,
2811         };
2812         struct fib6_table *table;
2813         struct rt6_info *rt;
2814
2815         table = fib6_get_table(net, tbid);
2816         if (!table)
2817                 return NULL;
2818
2819         if (!ipv6_addr_any(&cfg->fc_prefsrc))
2820                 flags |= RT6_LOOKUP_F_HAS_SADDR;
2821
2822         flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2823         rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
2824
2825         /* if table lookup failed, fall back to full lookup */
2826         if (rt == net->ipv6.ip6_null_entry) {
2827                 ip6_rt_put(rt);
2828                 rt = NULL;
2829         }
2830
2831         return rt;
2832 }
2833
2834 static int ip6_route_check_nh_onlink(struct net *net,
2835                                      struct fib6_config *cfg,
2836                                      const struct net_device *dev,
2837                                      struct netlink_ext_ack *extack)
2838 {
2839         u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2840         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2841         u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2842         struct fib6_info *from;
2843         struct rt6_info *grt;
2844         int err;
2845
2846         err = 0;
2847         grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2848         if (grt) {
2849                 rcu_read_lock();
2850                 from = rcu_dereference(grt->from);
2851                 if (!grt->dst.error &&
2852                     /* ignore match if it is the default route */
2853                     from && !ipv6_addr_any(&from->fib6_dst.addr) &&
2854                     (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
2855                         NL_SET_ERR_MSG(extack,
2856                                        "Nexthop has invalid gateway or device mismatch");
2857                         err = -EINVAL;
2858                 }
2859                 rcu_read_unlock();
2860
2861                 ip6_rt_put(grt);
2862         }
2863
2864         return err;
2865 }
2866
2867 static int ip6_route_check_nh(struct net *net,
2868                               struct fib6_config *cfg,
2869                               struct net_device **_dev,
2870                               struct inet6_dev **idev)
2871 {
2872         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2873         struct net_device *dev = _dev ? *_dev : NULL;
2874         struct rt6_info *grt = NULL;
2875         int err = -EHOSTUNREACH;
2876
2877         if (cfg->fc_table) {
2878                 int flags = RT6_LOOKUP_F_IFACE;
2879
2880                 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2881                                           cfg->fc_table, flags);
2882                 if (grt) {
2883                         if (grt->rt6i_flags & RTF_GATEWAY ||
2884                             (dev && dev != grt->dst.dev)) {
2885                                 ip6_rt_put(grt);
2886                                 grt = NULL;
2887                         }
2888                 }
2889         }
2890
2891         if (!grt)
2892                 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
2893
2894         if (!grt)
2895                 goto out;
2896
2897         if (dev) {
2898                 if (dev != grt->dst.dev) {
2899                         ip6_rt_put(grt);
2900                         goto out;
2901                 }
2902         } else {
2903                 *_dev = dev = grt->dst.dev;
2904                 *idev = grt->rt6i_idev;
2905                 dev_hold(dev);
2906                 in6_dev_hold(grt->rt6i_idev);
2907         }
2908
2909         if (!(grt->rt6i_flags & RTF_GATEWAY))
2910                 err = 0;
2911
2912         ip6_rt_put(grt);
2913
2914 out:
2915         return err;
2916 }
2917
2918 static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2919                            struct net_device **_dev, struct inet6_dev **idev,
2920                            struct netlink_ext_ack *extack)
2921 {
2922         const struct in6_addr *gw_addr = &cfg->fc_gateway;
2923         int gwa_type = ipv6_addr_type(gw_addr);
2924         bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
2925         const struct net_device *dev = *_dev;
2926         bool need_addr_check = !dev;
2927         int err = -EINVAL;
2928
2929         /* if gw_addr is local we will fail to detect this in case
2930          * address is still TENTATIVE (DAD in progress). rt6_lookup()
2931          * will return already-added prefix route via interface that
2932          * prefix route was assigned to, which might be non-loopback.
2933          */
2934         if (dev &&
2935             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2936                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2937                 goto out;
2938         }
2939
2940         if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2941                 /* IPv6 strictly inhibits using not link-local
2942                  * addresses as nexthop address.
2943                  * Otherwise, router will not able to send redirects.
2944                  * It is very good, but in some (rare!) circumstances
2945                  * (SIT, PtP, NBMA NOARP links) it is handy to allow
2946                  * some exceptions. --ANK
2947                  * We allow IPv4-mapped nexthops to support RFC4798-type
2948                  * addressing
2949                  */
2950                 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2951                         NL_SET_ERR_MSG(extack, "Invalid gateway address");
2952                         goto out;
2953                 }
2954
2955                 if (cfg->fc_flags & RTNH_F_ONLINK)
2956                         err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2957                 else
2958                         err = ip6_route_check_nh(net, cfg, _dev, idev);
2959
2960                 if (err)
2961                         goto out;
2962         }
2963
2964         /* reload in case device was changed */
2965         dev = *_dev;
2966
2967         err = -EINVAL;
2968         if (!dev) {
2969                 NL_SET_ERR_MSG(extack, "Egress device not specified");
2970                 goto out;
2971         } else if (dev->flags & IFF_LOOPBACK) {
2972                 NL_SET_ERR_MSG(extack,
2973                                "Egress device can not be loopback device for this route");
2974                 goto out;
2975         }
2976
2977         /* if we did not check gw_addr above, do so now that the
2978          * egress device has been resolved.
2979          */
2980         if (need_addr_check &&
2981             ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2982                 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2983                 goto out;
2984         }
2985
2986         err = 0;
2987 out:
2988         return err;
2989 }
2990
2991 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2992                                               gfp_t gfp_flags,
2993                                               struct netlink_ext_ack *extack)
2994 {
2995         struct net *net = cfg->fc_nlinfo.nl_net;
2996         struct fib6_info *rt = NULL;
2997         struct net_device *dev = NULL;
2998         struct inet6_dev *idev = NULL;
2999         struct fib6_table *table;
3000         int addr_type;
3001         int err = -EINVAL;
3002
3003         /* RTF_PCPU is an internal flag; can not be set by userspace */
3004         if (cfg->fc_flags & RTF_PCPU) {
3005                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3006                 goto out;
3007         }
3008
3009         /* RTF_CACHE is an internal flag; can not be set by userspace */
3010         if (cfg->fc_flags & RTF_CACHE) {
3011                 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
3012                 goto out;
3013         }
3014
3015         if (cfg->fc_type > RTN_MAX) {
3016                 NL_SET_ERR_MSG(extack, "Invalid route type");
3017                 goto out;
3018         }
3019
3020         if (cfg->fc_dst_len > 128) {
3021                 NL_SET_ERR_MSG(extack, "Invalid prefix length");
3022                 goto out;
3023         }
3024         if (cfg->fc_src_len > 128) {
3025                 NL_SET_ERR_MSG(extack, "Invalid source address length");
3026                 goto out;
3027         }
3028 #ifndef CONFIG_IPV6_SUBTREES
3029         if (cfg->fc_src_len) {
3030                 NL_SET_ERR_MSG(extack,
3031                                "Specifying source address requires IPV6_SUBTREES to be enabled");
3032                 goto out;
3033         }
3034 #endif
3035         if (cfg->fc_ifindex) {
3036                 err = -ENODEV;
3037                 dev = dev_get_by_index(net, cfg->fc_ifindex);
3038                 if (!dev)
3039                         goto out;
3040                 idev = in6_dev_get(dev);
3041                 if (!idev)
3042                         goto out;
3043         }
3044
3045         if (cfg->fc_metric == 0)
3046                 cfg->fc_metric = IP6_RT_PRIO_USER;
3047
3048         if (cfg->fc_flags & RTNH_F_ONLINK) {
3049                 if (!dev) {
3050                         NL_SET_ERR_MSG(extack,
3051                                        "Nexthop device required for onlink");
3052                         err = -ENODEV;
3053                         goto out;
3054                 }
3055
3056                 if (!(dev->flags & IFF_UP)) {
3057                         NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3058                         err = -ENETDOWN;
3059                         goto out;
3060                 }
3061         }
3062
3063         err = -ENOBUFS;
3064         if (cfg->fc_nlinfo.nlh &&
3065             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3066                 table = fib6_get_table(net, cfg->fc_table);
3067                 if (!table) {
3068                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3069                         table = fib6_new_table(net, cfg->fc_table);
3070                 }
3071         } else {
3072                 table = fib6_new_table(net, cfg->fc_table);
3073         }
3074
3075         if (!table)
3076                 goto out;
3077
3078         err = -ENOMEM;
3079         rt = fib6_info_alloc(gfp_flags);
3080         if (!rt)
3081                 goto out;
3082
3083 #ifdef CONFIG_IPV6_ROUTER_PREF
3084         rt->last_probe = jiffies;
3085 #endif
3086         if (cfg->fc_flags & RTF_ADDRCONF)
3087                 rt->dst_nocount = true;
3088
3089         err = ip6_convert_metrics(net, rt, cfg);
3090         if (err < 0)
3091                 goto out;
3092
3093         if (cfg->fc_flags & RTF_EXPIRES)
3094                 fib6_set_expires(rt, jiffies +
3095                                 clock_t_to_jiffies(cfg->fc_expires));
3096         else
3097                 fib6_clean_expires(rt);
3098
3099         if (cfg->fc_protocol == RTPROT_UNSPEC)
3100                 cfg->fc_protocol = RTPROT_BOOT;
3101         rt->fib6_protocol = cfg->fc_protocol;
3102
3103         addr_type = ipv6_addr_type(&cfg->fc_dst);
3104
3105         if (cfg->fc_encap) {
3106                 struct lwtunnel_state *lwtstate;
3107
3108                 err = lwtunnel_build_state(cfg->fc_encap_type,
3109                                            cfg->fc_encap, AF_INET6, cfg,
3110                                            &lwtstate, extack);
3111                 if (err)
3112                         goto out;
3113                 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
3114         }
3115
3116         ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3117         rt->fib6_dst.plen = cfg->fc_dst_len;
3118         if (rt->fib6_dst.plen == 128)
3119                 rt->dst_host = true;
3120
3121 #ifdef CONFIG_IPV6_SUBTREES
3122         ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3123         rt->fib6_src.plen = cfg->fc_src_len;
3124 #endif
3125
3126         rt->fib6_metric = cfg->fc_metric;
3127         rt->fib6_nh.nh_weight = 1;
3128
3129         rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
3130
3131         /* We cannot add true routes via loopback here,
3132            they would result in kernel looping; promote them to reject routes
3133          */
3134         if ((cfg->fc_flags & RTF_REJECT) ||
3135             (dev && (dev->flags & IFF_LOOPBACK) &&
3136              !(addr_type & IPV6_ADDR_LOOPBACK) &&
3137              !(cfg->fc_flags & RTF_LOCAL))) {
3138                 /* hold loopback dev/idev if we haven't done so. */
3139                 if (dev != net->loopback_dev) {
3140                         if (dev) {
3141                                 dev_put(dev);
3142                                 in6_dev_put(idev);
3143                         }
3144                         dev = net->loopback_dev;
3145                         dev_hold(dev);
3146                         idev = in6_dev_get(dev);
3147                         if (!idev) {
3148                                 err = -ENODEV;
3149                                 goto out;
3150                         }
3151                 }
3152                 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
3153                 goto install_route;
3154         }
3155
3156         if (cfg->fc_flags & RTF_GATEWAY) {
3157                 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3158                 if (err)
3159                         goto out;
3160
3161                 rt->fib6_nh.nh_gw = cfg->fc_gateway;
3162         }
3163
3164         err = -ENODEV;
3165         if (!dev)
3166                 goto out;
3167
3168         if (idev->cnf.disable_ipv6) {
3169                 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3170                 err = -EACCES;
3171                 goto out;
3172         }
3173
3174         if (!(dev->flags & IFF_UP)) {
3175                 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3176                 err = -ENETDOWN;
3177                 goto out;
3178         }
3179
3180         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3181                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3182                         NL_SET_ERR_MSG(extack, "Invalid source address");
3183                         err = -EINVAL;
3184                         goto out;
3185                 }
3186                 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3187                 rt->fib6_prefsrc.plen = 128;
3188         } else
3189                 rt->fib6_prefsrc.plen = 0;
3190
3191         rt->fib6_flags = cfg->fc_flags;
3192
3193 install_route:
3194         if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3195             !netif_carrier_ok(dev))
3196                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3197         rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3198         rt->fib6_nh.nh_dev = dev;
3199         rt->fib6_table = table;
3200
3201         cfg->fc_nlinfo.nl_net = dev_net(dev);
3202
3203         if (idev)
3204                 in6_dev_put(idev);
3205
3206         return rt;
3207 out:
3208         if (dev)
3209                 dev_put(dev);
3210         if (idev)
3211                 in6_dev_put(idev);
3212
3213         fib6_info_release(rt);
3214         return ERR_PTR(err);
3215 }
3216
3217 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3218                   struct netlink_ext_ack *extack)
3219 {
3220         struct fib6_info *rt;
3221         int err;
3222
3223         rt = ip6_route_info_create(cfg, gfp_flags, extack);
3224         if (IS_ERR(rt))
3225                 return PTR_ERR(rt);
3226
3227         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
3228         fib6_info_release(rt);
3229
3230         return err;
3231 }
3232
3233 static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
3234 {
3235         struct net *net = info->nl_net;
3236         struct fib6_table *table;
3237         int err;
3238
3239         if (rt == net->ipv6.fib6_null_entry) {
3240                 err = -ENOENT;
3241                 goto out;
3242         }
3243
3244         table = rt->fib6_table;
3245         spin_lock_bh(&table->tb6_lock);
3246         err = fib6_del(rt, info);
3247         spin_unlock_bh(&table->tb6_lock);
3248
3249 out:
3250         fib6_info_release(rt);
3251         return err;
3252 }
3253
3254 int ip6_del_rt(struct net *net, struct fib6_info *rt)
3255 {
3256         struct nl_info info = { .nl_net = net };
3257
3258         return __ip6_del_rt(rt, &info);
3259 }
3260
3261 static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
3262 {
3263         struct nl_info *info = &cfg->fc_nlinfo;
3264         struct net *net = info->nl_net;
3265         struct sk_buff *skb = NULL;
3266         struct fib6_table *table;
3267         int err = -ENOENT;
3268
3269         if (rt == net->ipv6.fib6_null_entry)
3270                 goto out_put;
3271         table = rt->fib6_table;
3272         spin_lock_bh(&table->tb6_lock);
3273
3274         if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
3275                 struct fib6_info *sibling, *next_sibling;
3276
3277                 /* prefer to send a single notification with all hops */
3278                 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3279                 if (skb) {
3280                         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3281
3282                         if (rt6_fill_node(net, skb, rt, NULL,
3283                                           NULL, NULL, 0, RTM_DELROUTE,
3284                                           info->portid, seq, 0) < 0) {
3285                                 kfree_skb(skb);
3286                                 skb = NULL;
3287                         } else
3288                                 info->skip_notify = 1;
3289                 }
3290
3291                 list_for_each_entry_safe(sibling, next_sibling,
3292                                          &rt->fib6_siblings,
3293                                          fib6_siblings) {
3294                         err = fib6_del(sibling, info);
3295                         if (err)
3296                                 goto out_unlock;
3297                 }
3298         }
3299
3300         err = fib6_del(rt, info);
3301 out_unlock:
3302         spin_unlock_bh(&table->tb6_lock);
3303 out_put:
3304         fib6_info_release(rt);
3305
3306         if (skb) {
3307                 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3308                             info->nlh, gfp_any());
3309         }
3310         return err;
3311 }
3312
3313 static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3314 {
3315         int rc = -ESRCH;
3316
3317         if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3318                 goto out;
3319
3320         if (cfg->fc_flags & RTF_GATEWAY &&
3321             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3322                 goto out;
3323
3324         rc = rt6_remove_exception_rt(rt);
3325 out:
3326         return rc;
3327 }
3328
3329 static int ip6_route_del(struct fib6_config *cfg,
3330                          struct netlink_ext_ack *extack)
3331 {
3332         struct rt6_info *rt_cache;
3333         struct fib6_table *table;
3334         struct fib6_info *rt;
3335         struct fib6_node *fn;
3336         int err = -ESRCH;
3337
3338         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3339         if (!table) {
3340                 NL_SET_ERR_MSG(extack, "FIB table does not exist");
3341                 return err;
3342         }
3343
3344         rcu_read_lock();
3345
3346         fn = fib6_locate(&table->tb6_root,
3347                          &cfg->fc_dst, cfg->fc_dst_len,
3348                          &cfg->fc_src, cfg->fc_src_len,
3349                          !(cfg->fc_flags & RTF_CACHE));
3350
3351         if (fn) {
3352                 for_each_fib6_node_rt_rcu(fn) {
3353                         if (cfg->fc_flags & RTF_CACHE) {
3354                                 int rc;
3355
3356                                 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3357                                                               &cfg->fc_src);
3358                                 if (rt_cache) {
3359                                         rc = ip6_del_cached_rt(rt_cache, cfg);
3360                                         if (rc != -ESRCH) {
3361                                                 rcu_read_unlock();
3362                                                 return rc;
3363                                         }
3364                                 }
3365                                 continue;
3366                         }
3367                         if (cfg->fc_ifindex &&
3368                             (!rt->fib6_nh.nh_dev ||
3369                              rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
3370                                 continue;
3371                         if (cfg->fc_flags & RTF_GATEWAY &&
3372                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
3373                                 continue;
3374                         if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
3375                                 continue;
3376                         if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3377                                 continue;
3378                         if (!fib6_info_hold_safe(rt))
3379                                 continue;
3380                         rcu_read_unlock();
3381
3382                         /* if gateway was specified only delete the one hop */
3383                         if (cfg->fc_flags & RTF_GATEWAY)
3384                                 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3385
3386                         return __ip6_del_rt_siblings(rt, cfg);
3387                 }
3388         }
3389         rcu_read_unlock();
3390
3391         return err;
3392 }
3393
3394 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3395 {
3396         struct netevent_redirect netevent;
3397         struct rt6_info *rt, *nrt = NULL;
3398         struct ndisc_options ndopts;
3399         struct inet6_dev *in6_dev;
3400         struct neighbour *neigh;
3401         struct fib6_info *from;
3402         struct rd_msg *msg;
3403         int optlen, on_link;
3404         u8 *lladdr;
3405
3406         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
3407         optlen -= sizeof(*msg);
3408
3409         if (optlen < 0) {
3410                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3411                 return;
3412         }
3413
3414         msg = (struct rd_msg *)icmp6_hdr(skb);
3415
3416         if (ipv6_addr_is_multicast(&msg->dest)) {
3417                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3418                 return;
3419         }
3420
3421         on_link = 0;
3422         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3423                 on_link = 1;
3424         } else if (ipv6_addr_type(&msg->target) !=
3425                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
3426                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3427                 return;
3428         }
3429
3430         in6_dev = __in6_dev_get(skb->dev);
3431         if (!in6_dev)
3432                 return;
3433         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3434                 return;
3435
3436         /* RFC2461 8.1:
3437          *      The IP source address of the Redirect MUST be the same as the current
3438          *      first-hop router for the specified ICMP Destination Address.
3439          */
3440
3441         if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3442                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3443                 return;
3444         }
3445
3446         lladdr = NULL;
3447         if (ndopts.nd_opts_tgt_lladdr) {
3448                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3449                                              skb->dev);
3450                 if (!lladdr) {
3451                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3452                         return;
3453                 }
3454         }
3455
3456         rt = (struct rt6_info *) dst;
3457         if (rt->rt6i_flags & RTF_REJECT) {
3458                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3459                 return;
3460         }
3461
3462         /* Redirect received -> path was valid.
3463          * Look, redirects are sent only in response to data packets,
3464          * so that this nexthop apparently is reachable. --ANK
3465          */
3466         dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
3467
3468         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3469         if (!neigh)
3470                 return;
3471
3472         /*
3473          *      We have finally decided to accept it.
3474          */
3475
3476         ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
3477                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
3478                      NEIGH_UPDATE_F_OVERRIDE|
3479                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3480                                      NEIGH_UPDATE_F_ISROUTER)),
3481                      NDISC_REDIRECT, &ndopts);
3482
3483         rcu_read_lock();
3484         from = rcu_dereference(rt->from);
3485         if (!from)
3486                 goto out;
3487
3488         nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
3489         if (!nrt)
3490                 goto out;
3491
3492         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3493         if (on_link)
3494                 nrt->rt6i_flags &= ~RTF_GATEWAY;
3495
3496         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
3497
3498         /* rt6_insert_exception() will take care of duplicated exceptions */
3499         if (rt6_insert_exception(nrt, from)) {
3500                 dst_release_immediate(&nrt->dst);
3501                 goto out;
3502         }
3503
3504         netevent.old = &rt->dst;
3505         netevent.new = &nrt->dst;
3506         netevent.daddr = &msg->dest;
3507         netevent.neigh = neigh;
3508         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3509
3510 out:
3511         rcu_read_unlock();
3512         neigh_release(neigh);
3513 }
3514
3515 #ifdef CONFIG_IPV6_ROUTE_INFO
3516 static struct fib6_info *rt6_get_route_info(struct net *net,
3517                                            const struct in6_addr *prefix, int prefixlen,
3518                                            const struct in6_addr *gwaddr,
3519                                            struct net_device *dev)
3520 {
3521         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3522         int ifindex = dev->ifindex;
3523         struct fib6_node *fn;
3524         struct fib6_info *rt = NULL;
3525         struct fib6_table *table;
3526
3527         table = fib6_get_table(net, tb_id);
3528         if (!table)
3529                 return NULL;
3530
3531         rcu_read_lock();
3532         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
3533         if (!fn)
3534                 goto out;
3535
3536         for_each_fib6_node_rt_rcu(fn) {
3537                 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
3538                         continue;
3539                 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3540                         continue;
3541                 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
3542                         continue;
3543                 if (!fib6_info_hold_safe(rt))
3544                         continue;
3545                 break;
3546         }
3547 out:
3548         rcu_read_unlock();
3549         return rt;
3550 }
3551
3552 static struct fib6_info *rt6_add_route_info(struct net *net,
3553                                            const struct in6_addr *prefix, int prefixlen,
3554                                            const struct in6_addr *gwaddr,
3555                                            struct net_device *dev,
3556                                            unsigned int pref)
3557 {
3558         struct fib6_config cfg = {
3559                 .fc_metric      = IP6_RT_PRIO_USER,
3560                 .fc_ifindex     = dev->ifindex,
3561                 .fc_dst_len     = prefixlen,
3562                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3563                                   RTF_UP | RTF_PREF(pref),
3564                 .fc_protocol = RTPROT_RA,
3565                 .fc_type = RTN_UNICAST,
3566                 .fc_nlinfo.portid = 0,
3567                 .fc_nlinfo.nlh = NULL,
3568                 .fc_nlinfo.nl_net = net,
3569         };
3570
3571         cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
3572         cfg.fc_dst = *prefix;
3573         cfg.fc_gateway = *gwaddr;
3574
3575         /* We should treat it as a default route if prefix length is 0. */
3576         if (!prefixlen)
3577                 cfg.fc_flags |= RTF_DEFAULT;
3578
3579         ip6_route_add(&cfg, GFP_ATOMIC, NULL);
3580
3581         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
3582 }
3583 #endif
3584
3585 struct fib6_info *rt6_get_dflt_router(struct net *net,
3586                                      const struct in6_addr *addr,
3587                                      struct net_device *dev)
3588 {
3589         u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
3590         struct fib6_info *rt;
3591         struct fib6_table *table;
3592
3593         table = fib6_get_table(net, tb_id);
3594         if (!table)
3595                 return NULL;
3596
3597         rcu_read_lock();
3598         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3599                 if (dev == rt->fib6_nh.nh_dev &&
3600                     ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3601                     ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
3602                         break;
3603         }
3604         if (rt && !fib6_info_hold_safe(rt))
3605                 rt = NULL;
3606         rcu_read_unlock();
3607         return rt;
3608 }
3609
3610 struct fib6_info *rt6_add_dflt_router(struct net *net,
3611                                      const struct in6_addr *gwaddr,
3612                                      struct net_device *dev,
3613                                      unsigned int pref)
3614 {
3615         struct fib6_config cfg = {
3616                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3617                 .fc_metric      = IP6_RT_PRIO_USER,
3618                 .fc_ifindex     = dev->ifindex,
3619                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3620                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3621                 .fc_protocol = RTPROT_RA,
3622                 .fc_type = RTN_UNICAST,
3623                 .fc_nlinfo.portid = 0,
3624                 .fc_nlinfo.nlh = NULL,
3625                 .fc_nlinfo.nl_net = net,
3626         };
3627
3628         cfg.fc_gateway = *gwaddr;
3629
3630         if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3631                 struct fib6_table *table;
3632
3633                 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3634                 if (table)
3635                         table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3636         }
3637
3638         return rt6_get_dflt_router(net, gwaddr, dev);
3639 }
3640
3641 static void __rt6_purge_dflt_routers(struct net *net,
3642                                      struct fib6_table *table)
3643 {
3644         struct fib6_info *rt;
3645
3646 restart:
3647         rcu_read_lock();
3648         for_each_fib6_node_rt_rcu(&table->tb6_root) {
3649                 struct net_device *dev = fib6_info_nh_dev(rt);
3650                 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3651
3652                 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3653                     (!idev || idev->cnf.accept_ra != 2) &&
3654                     fib6_info_hold_safe(rt)) {
3655                         rcu_read_unlock();
3656                         ip6_del_rt(net, rt);
3657                         goto restart;
3658                 }
3659         }
3660         rcu_read_unlock();
3661
3662         table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3663 }
3664
3665 void rt6_purge_dflt_routers(struct net *net)
3666 {
3667         struct fib6_table *table;
3668         struct hlist_head *head;
3669         unsigned int h;
3670
3671         rcu_read_lock();
3672
3673         for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3674                 head = &net->ipv6.fib_table_hash[h];
3675                 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3676                         if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3677                                 __rt6_purge_dflt_routers(net, table);
3678                 }
3679         }
3680
3681         rcu_read_unlock();
3682 }
3683
3684 static void rtmsg_to_fib6_config(struct net *net,
3685                                  struct in6_rtmsg *rtmsg,
3686                                  struct fib6_config *cfg)
3687 {
3688         memset(cfg, 0, sizeof(*cfg));
3689
3690         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3691                          : RT6_TABLE_MAIN;
3692         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3693         cfg->fc_metric = rtmsg->rtmsg_metric;
3694         cfg->fc_expires = rtmsg->rtmsg_info;
3695         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3696         cfg->fc_src_len = rtmsg->rtmsg_src_len;
3697         cfg->fc_flags = rtmsg->rtmsg_flags;
3698         cfg->fc_type = rtmsg->rtmsg_type;
3699
3700         cfg->fc_nlinfo.nl_net = net;
3701
3702         cfg->fc_dst = rtmsg->rtmsg_dst;
3703         cfg->fc_src = rtmsg->rtmsg_src;
3704         cfg->fc_gateway = rtmsg->rtmsg_gateway;
3705 }
3706
3707 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3708 {
3709         struct fib6_config cfg;
3710         struct in6_rtmsg rtmsg;
3711         int err;
3712
3713         switch (cmd) {
3714         case SIOCADDRT:         /* Add a route */
3715         case SIOCDELRT:         /* Delete a route */
3716                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3717                         return -EPERM;
3718                 err = copy_from_user(&rtmsg, arg,
3719                                      sizeof(struct in6_rtmsg));
3720                 if (err)
3721                         return -EFAULT;
3722
3723                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
3724
3725                 rtnl_lock();
3726                 switch (cmd) {
3727                 case SIOCADDRT:
3728                         err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
3729                         break;
3730                 case SIOCDELRT:
3731                         err = ip6_route_del(&cfg, NULL);
3732                         break;
3733                 default:
3734                         err = -EINVAL;
3735                 }
3736                 rtnl_unlock();
3737
3738                 return err;
3739         }
3740
3741         return -EINVAL;
3742 }
3743
3744 /*
3745  *      Drop the packet on the floor
3746  */
3747
3748 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
3749 {
3750         int type;
3751         struct dst_entry *dst = skb_dst(skb);
3752         switch (ipstats_mib_noroutes) {
3753         case IPSTATS_MIB_INNOROUTES:
3754                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
3755                 if (type == IPV6_ADDR_ANY) {
3756                         IP6_INC_STATS(dev_net(dst->dev),
3757                                       __in6_dev_get_safely(skb->dev),
3758                                       IPSTATS_MIB_INADDRERRORS);
3759                         break;
3760                 }
3761                 /* FALLTHROUGH */
3762         case IPSTATS_MIB_OUTNOROUTES:
3763                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3764                               ipstats_mib_noroutes);
3765                 break;
3766         }
3767         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
3768         kfree_skb(skb);
3769         return 0;
3770 }
3771
3772 static int ip6_pkt_discard(struct sk_buff *skb)
3773 {
3774         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
3775 }
3776
3777 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3778 {
3779         skb->dev = skb_dst(skb)->dev;
3780         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
3781 }
3782
3783 static int ip6_pkt_prohibit(struct sk_buff *skb)
3784 {
3785         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
3786 }
3787
3788 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
3789 {
3790         skb->dev = skb_dst(skb)->dev;
3791         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
3792 }
3793
3794 /*
3795  *      Allocate a dst for local (unicast / anycast) address.
3796  */
3797
3798 struct fib6_info *addrconf_f6i_alloc(struct net *net,
3799                                      struct inet6_dev *idev,
3800                                      const struct in6_addr *addr,
3801                                      bool anycast, gfp_t gfp_flags)
3802 {
3803         u32 tb_id;
3804         struct net_device *dev = idev->dev;
3805         struct fib6_info *f6i;
3806
3807         f6i = fib6_info_alloc(gfp_flags);
3808         if (!f6i)
3809                 return ERR_PTR(-ENOMEM);
3810
3811         f6i->dst_nocount = true;
3812         f6i->dst_host = true;
3813         f6i->fib6_protocol = RTPROT_KERNEL;
3814         f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3815         if (anycast) {
3816                 f6i->fib6_type = RTN_ANYCAST;
3817                 f6i->fib6_flags |= RTF_ANYCAST;
3818         } else {
3819                 f6i->fib6_type = RTN_LOCAL;
3820                 f6i->fib6_flags |= RTF_LOCAL;
3821         }
3822
3823         f6i->fib6_nh.nh_gw = *addr;
3824         dev_hold(dev);
3825         f6i->fib6_nh.nh_dev = dev;
3826         f6i->fib6_dst.addr = *addr;
3827         f6i->fib6_dst.plen = 128;
3828         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3829         f6i->fib6_table = fib6_get_table(net, tb_id);
3830
3831         return f6i;
3832 }
3833
3834 /* remove deleted ip from prefsrc entries */
3835 struct arg_dev_net_ip {
3836         struct net_device *dev;
3837         struct net *net;
3838         struct in6_addr *addr;
3839 };
3840
3841 static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3842 {
3843         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3844         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3845         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3846
3847         if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3848             rt != net->ipv6.fib6_null_entry &&
3849             ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
3850                 spin_lock_bh(&rt6_exception_lock);
3851                 /* remove prefsrc entry */
3852                 rt->fib6_prefsrc.plen = 0;
3853                 /* need to update cache as well */
3854                 rt6_exceptions_remove_prefsrc(rt);
3855                 spin_unlock_bh(&rt6_exception_lock);
3856         }
3857         return 0;
3858 }
3859
3860 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3861 {
3862         struct net *net = dev_net(ifp->idev->dev);
3863         struct arg_dev_net_ip adni = {
3864                 .dev = ifp->idev->dev,
3865                 .net = net,
3866                 .addr = &ifp->addr,
3867         };
3868         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3869 }
3870
3871 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3872
3873 /* Remove routers and update dst entries when gateway turn into host. */
3874 static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3875 {
3876         struct in6_addr *gateway = (struct in6_addr *)arg;
3877
3878         if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3879             ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3880                 return -1;
3881         }
3882
3883         /* Further clean up cached routes in exception table.
3884          * This is needed because cached route may have a different
3885          * gateway than its 'parent' in the case of an ip redirect.
3886          */
3887         rt6_exceptions_clean_tohost(rt, gateway);
3888
3889         return 0;
3890 }
3891
3892 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3893 {
3894         fib6_clean_all(net, fib6_clean_tohost, gateway);
3895 }
3896
3897 struct arg_netdev_event {
3898         const struct net_device *dev;
3899         union {
3900                 unsigned int nh_flags;
3901                 unsigned long event;
3902         };
3903 };
3904
3905 static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3906 {
3907         struct fib6_info *iter;
3908         struct fib6_node *fn;
3909
3910         fn = rcu_dereference_protected(rt->fib6_node,
3911                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3912         iter = rcu_dereference_protected(fn->leaf,
3913                         lockdep_is_held(&rt->fib6_table->tb6_lock));
3914         while (iter) {
3915                 if (iter->fib6_metric == rt->fib6_metric &&
3916                     rt6_qualify_for_ecmp(iter))
3917                         return iter;
3918                 iter = rcu_dereference_protected(iter->fib6_next,
3919                                 lockdep_is_held(&rt->fib6_table->tb6_lock));
3920         }
3921
3922         return NULL;
3923 }
3924
3925 static bool rt6_is_dead(const struct fib6_info *rt)
3926 {
3927         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3928             (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3929              fib6_ignore_linkdown(rt)))
3930                 return true;
3931
3932         return false;
3933 }
3934
3935 static int rt6_multipath_total_weight(const struct fib6_info *rt)
3936 {
3937         struct fib6_info *iter;
3938         int total = 0;
3939
3940         if (!rt6_is_dead(rt))
3941                 total += rt->fib6_nh.nh_weight;
3942
3943         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3944                 if (!rt6_is_dead(iter))
3945                         total += iter->fib6_nh.nh_weight;
3946         }
3947
3948         return total;
3949 }
3950
3951 static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3952 {
3953         int upper_bound = -1;
3954
3955         if (!rt6_is_dead(rt)) {
3956                 *weight += rt->fib6_nh.nh_weight;
3957                 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3958                                                     total) - 1;
3959         }
3960         atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3961 }
3962
3963 static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3964 {
3965         struct fib6_info *iter;
3966         int weight = 0;
3967
3968         rt6_upper_bound_set(rt, &weight, total);
3969
3970         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3971                 rt6_upper_bound_set(iter, &weight, total);
3972 }
3973
3974 void rt6_multipath_rebalance(struct fib6_info *rt)
3975 {
3976         struct fib6_info *first;
3977         int total;
3978
3979         /* In case the entire multipath route was marked for flushing,
3980          * then there is no need to rebalance upon the removal of every
3981          * sibling route.
3982          */
3983         if (!rt->fib6_nsiblings || rt->should_flush)
3984                 return;
3985
3986         /* During lookup routes are evaluated in order, so we need to
3987          * make sure upper bounds are assigned from the first sibling
3988          * onwards.
3989          */
3990         first = rt6_multipath_first_sibling(rt);
3991         if (WARN_ON_ONCE(!first))
3992                 return;
3993
3994         total = rt6_multipath_total_weight(first);
3995         rt6_multipath_upper_bound_set(first, total);
3996 }
3997
3998 static int fib6_ifup(struct fib6_info *rt, void *p_arg)
3999 {
4000         const struct arg_netdev_event *arg = p_arg;
4001         struct net *net = dev_net(arg->dev);
4002
4003         if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
4004                 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
4005                 fib6_update_sernum_upto_root(net, rt);
4006                 rt6_multipath_rebalance(rt);
4007         }
4008
4009         return 0;
4010 }
4011
4012 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
4013 {
4014         struct arg_netdev_event arg = {
4015                 .dev = dev,
4016                 {
4017                         .nh_flags = nh_flags,
4018                 },
4019         };
4020
4021         if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
4022                 arg.nh_flags |= RTNH_F_LINKDOWN;
4023
4024         fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
4025 }
4026
4027 static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
4028                                    const struct net_device *dev)
4029 {
4030         struct fib6_info *iter;
4031
4032         if (rt->fib6_nh.nh_dev == dev)
4033                 return true;
4034         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4035                 if (iter->fib6_nh.nh_dev == dev)
4036                         return true;
4037
4038         return false;
4039 }
4040
4041 static void rt6_multipath_flush(struct fib6_info *rt)
4042 {
4043         struct fib6_info *iter;
4044
4045         rt->should_flush = 1;
4046         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4047                 iter->should_flush = 1;
4048 }
4049
4050 static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
4051                                              const struct net_device *down_dev)
4052 {
4053         struct fib6_info *iter;
4054         unsigned int dead = 0;
4055
4056         if (rt->fib6_nh.nh_dev == down_dev ||
4057             rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4058                 dead++;
4059         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4060                 if (iter->fib6_nh.nh_dev == down_dev ||
4061                     iter->fib6_nh.nh_flags & RTNH_F_DEAD)
4062                         dead++;
4063
4064         return dead;
4065 }
4066
4067 static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
4068                                        const struct net_device *dev,
4069                                        unsigned int nh_flags)
4070 {
4071         struct fib6_info *iter;
4072
4073         if (rt->fib6_nh.nh_dev == dev)
4074                 rt->fib6_nh.nh_flags |= nh_flags;
4075         list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4076                 if (iter->fib6_nh.nh_dev == dev)
4077                         iter->fib6_nh.nh_flags |= nh_flags;
4078 }
4079
4080 /* called with write lock held for table with rt */
4081 static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
4082 {
4083         const struct arg_netdev_event *arg = p_arg;
4084         const struct net_device *dev = arg->dev;
4085         struct net *net = dev_net(dev);
4086
4087         if (rt == net->ipv6.fib6_null_entry)
4088                 return 0;
4089
4090         switch (arg->event) {
4091         case NETDEV_UNREGISTER:
4092                 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4093         case NETDEV_DOWN:
4094                 if (rt->should_flush)
4095                         return -1;
4096                 if (!rt->fib6_nsiblings)
4097                         return rt->fib6_nh.nh_dev == dev ? -1 : 0;
4098                 if (rt6_multipath_uses_dev(rt, dev)) {
4099                         unsigned int count;
4100
4101                         count = rt6_multipath_dead_count(rt, dev);
4102                         if (rt->fib6_nsiblings + 1 == count) {
4103                                 rt6_multipath_flush(rt);
4104                                 return -1;
4105                         }
4106                         rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4107                                                    RTNH_F_LINKDOWN);
4108                         fib6_update_sernum(net, rt);
4109                         rt6_multipath_rebalance(rt);
4110                 }
4111                 return -2;
4112         case NETDEV_CHANGE:
4113                 if (rt->fib6_nh.nh_dev != dev ||
4114                     rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
4115                         break;
4116                 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
4117                 rt6_multipath_rebalance(rt);
4118                 break;
4119         }
4120
4121         return 0;
4122 }
4123
4124 void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
4125 {
4126         struct arg_netdev_event arg = {
4127                 .dev = dev,
4128                 {
4129                         .event = event,
4130                 },
4131         };
4132
4133         fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4134 }
4135
4136 void rt6_disable_ip(struct net_device *dev, unsigned long event)
4137 {
4138         rt6_sync_down_dev(dev, event);
4139         rt6_uncached_list_flush_dev(dev_net(dev), dev);
4140         neigh_ifdown(&nd_tbl, dev);
4141 }
4142
4143 struct rt6_mtu_change_arg {
4144         struct net_device *dev;
4145         unsigned int mtu;
4146 };
4147
4148 static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
4149 {
4150         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4151         struct inet6_dev *idev;
4152
4153         /* In IPv6 pmtu discovery is not optional,
4154            so that RTAX_MTU lock cannot disable it.
4155            We still use this lock to block changes
4156            caused by addrconf/ndisc.
4157         */
4158
4159         idev = __in6_dev_get(arg->dev);
4160         if (!idev)
4161                 return 0;
4162
4163         /* For administrative MTU increase, there is no way to discover
4164            IPv6 PMTU increase, so PMTU increase should be updated here.
4165            Since RFC 1981 doesn't include administrative MTU increase
4166            update PMTU increase is a MUST. (i.e. jumbo frame)
4167          */
4168         if (rt->fib6_nh.nh_dev == arg->dev &&
4169             !fib6_metric_locked(rt, RTAX_MTU)) {
4170                 u32 mtu = rt->fib6_pmtu;
4171
4172                 if (mtu >= arg->mtu ||
4173                     (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4174                         fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4175
4176                 spin_lock_bh(&rt6_exception_lock);
4177                 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4178                 spin_unlock_bh(&rt6_exception_lock);
4179         }
4180         return 0;
4181 }
4182
4183 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
4184 {
4185         struct rt6_mtu_change_arg arg = {
4186                 .dev = dev,
4187                 .mtu = mtu,
4188         };
4189
4190         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
4191 }
4192
4193 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
4194         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4195         [RTA_PREFSRC]           = { .len = sizeof(struct in6_addr) },
4196         [RTA_OIF]               = { .type = NLA_U32 },
4197         [RTA_IIF]               = { .type = NLA_U32 },
4198         [RTA_PRIORITY]          = { .type = NLA_U32 },
4199         [RTA_METRICS]           = { .type = NLA_NESTED },
4200         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
4201         [RTA_PREF]              = { .type = NLA_U8 },
4202         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
4203         [RTA_ENCAP]             = { .type = NLA_NESTED },
4204         [RTA_EXPIRES]           = { .type = NLA_U32 },
4205         [RTA_UID]               = { .type = NLA_U32 },
4206         [RTA_MARK]              = { .type = NLA_U32 },
4207         [RTA_TABLE]             = { .type = NLA_U32 },
4208         [RTA_IP_PROTO]          = { .type = NLA_U8 },
4209         [RTA_SPORT]             = { .type = NLA_U16 },
4210         [RTA_DPORT]             = { .type = NLA_U16 },
4211 };
4212
4213 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4214                               struct fib6_config *cfg,
4215                               struct netlink_ext_ack *extack)
4216 {
4217         struct rtmsg *rtm;
4218         struct nlattr *tb[RTA_MAX+1];
4219         unsigned int pref;
4220         int err;
4221
4222         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4223                           NULL);
4224         if (err < 0)
4225                 goto errout;
4226
4227         err = -EINVAL;
4228         rtm = nlmsg_data(nlh);
4229         memset(cfg, 0, sizeof(*cfg));
4230
4231         cfg->fc_table = rtm->rtm_table;
4232         cfg->fc_dst_len = rtm->rtm_dst_len;
4233         cfg->fc_src_len = rtm->rtm_src_len;
4234         cfg->fc_flags = RTF_UP;
4235         cfg->fc_protocol = rtm->rtm_protocol;
4236         cfg->fc_type = rtm->rtm_type;
4237
4238         if (rtm->rtm_type == RTN_UNREACHABLE ||
4239             rtm->rtm_type == RTN_BLACKHOLE ||
4240             rtm->rtm_type == RTN_PROHIBIT ||
4241             rtm->rtm_type == RTN_THROW)
4242                 cfg->fc_flags |= RTF_REJECT;
4243
4244         if (rtm->rtm_type == RTN_LOCAL)
4245                 cfg->fc_flags |= RTF_LOCAL;
4246
4247         if (rtm->rtm_flags & RTM_F_CLONED)
4248                 cfg->fc_flags |= RTF_CACHE;
4249
4250         cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4251
4252         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
4253         cfg->fc_nlinfo.nlh = nlh;
4254         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
4255
4256         if (tb[RTA_GATEWAY]) {
4257                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
4258                 cfg->fc_flags |= RTF_GATEWAY;
4259         }
4260         if (tb[RTA_VIA]) {
4261                 NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4262                 goto errout;
4263         }
4264
4265         if (tb[RTA_DST]) {
4266                 int plen = (rtm->rtm_dst_len + 7) >> 3;
4267
4268                 if (nla_len(tb[RTA_DST]) < plen)
4269                         goto errout;
4270
4271                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
4272         }
4273
4274         if (tb[RTA_SRC]) {
4275                 int plen = (rtm->rtm_src_len + 7) >> 3;
4276
4277                 if (nla_len(tb[RTA_SRC]) < plen)
4278                         goto errout;
4279
4280                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
4281         }
4282
4283         if (tb[RTA_PREFSRC])
4284                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4285
4286         if (tb[RTA_OIF])
4287                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4288
4289         if (tb[RTA_PRIORITY])
4290                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4291
4292         if (tb[RTA_METRICS]) {
4293                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4294                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
4295         }
4296
4297         if (tb[RTA_TABLE])
4298                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4299
4300         if (tb[RTA_MULTIPATH]) {
4301                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4302                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
4303
4304                 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4305                                                      cfg->fc_mp_len, extack);
4306                 if (err < 0)
4307                         goto errout;
4308         }
4309
4310         if (tb[RTA_PREF]) {
4311                 pref = nla_get_u8(tb[RTA_PREF]);
4312                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4313                     pref != ICMPV6_ROUTER_PREF_HIGH)
4314                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
4315                 cfg->fc_flags |= RTF_PREF(pref);
4316         }
4317
4318         if (tb[RTA_ENCAP])
4319                 cfg->fc_encap = tb[RTA_ENCAP];
4320
4321         if (tb[RTA_ENCAP_TYPE]) {
4322                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4323
4324                 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
4325                 if (err < 0)
4326                         goto errout;
4327         }
4328
4329         if (tb[RTA_EXPIRES]) {
4330                 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4331
4332                 if (addrconf_finite_timeout(timeout)) {
4333                         cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4334                         cfg->fc_flags |= RTF_EXPIRES;
4335                 }
4336         }
4337
4338         err = 0;
4339 errout:
4340         return err;
4341 }
4342
4343 struct rt6_nh {
4344         struct fib6_info *fib6_info;
4345         struct fib6_config r_cfg;
4346         struct list_head next;
4347 };
4348
4349 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4350 {
4351         struct rt6_nh *nh;
4352
4353         list_for_each_entry(nh, rt6_nh_list, next) {
4354                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
4355                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4356                         nh->r_cfg.fc_ifindex);
4357         }
4358 }
4359
4360 static int ip6_route_info_append(struct net *net,
4361                                  struct list_head *rt6_nh_list,
4362                                  struct fib6_info *rt,
4363                                  struct fib6_config *r_cfg)
4364 {
4365         struct rt6_nh *nh;
4366         int err = -EEXIST;
4367
4368         list_for_each_entry(nh, rt6_nh_list, next) {
4369                 /* check if fib6_info already exists */
4370                 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
4371                         return err;
4372         }
4373
4374         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4375         if (!nh)
4376                 return -ENOMEM;
4377         nh->fib6_info = rt;
4378         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4379         list_add_tail(&nh->next, rt6_nh_list);
4380
4381         return 0;
4382 }
4383
4384 static void ip6_route_mpath_notify(struct fib6_info *rt,
4385                                    struct fib6_info *rt_last,
4386                                    struct nl_info *info,
4387                                    __u16 nlflags)
4388 {
4389         /* if this is an APPEND route, then rt points to the first route
4390          * inserted and rt_last points to last route inserted. Userspace
4391          * wants a consistent dump of the route which starts at the first
4392          * nexthop. Since sibling routes are always added at the end of
4393          * the list, find the first sibling of the last route appended
4394          */
4395         if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4396                 rt = list_first_entry(&rt_last->fib6_siblings,
4397                                       struct fib6_info,
4398                                       fib6_siblings);
4399         }
4400
4401         if (rt)
4402                 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4403 }
4404
4405 static int ip6_route_multipath_add(struct fib6_config *cfg,
4406                                    struct netlink_ext_ack *extack)
4407 {
4408         struct fib6_info *rt_notif = NULL, *rt_last = NULL;
4409         struct nl_info *info = &cfg->fc_nlinfo;
4410         struct fib6_config r_cfg;
4411         struct rtnexthop *rtnh;
4412         struct fib6_info *rt;
4413         struct rt6_nh *err_nh;
4414         struct rt6_nh *nh, *nh_safe;
4415         __u16 nlflags;
4416         int remaining;
4417         int attrlen;
4418         int err = 1;
4419         int nhn = 0;
4420         int replace = (cfg->fc_nlinfo.nlh &&
4421                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4422         LIST_HEAD(rt6_nh_list);
4423
4424         nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4425         if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4426                 nlflags |= NLM_F_APPEND;
4427
4428         remaining = cfg->fc_mp_len;
4429         rtnh = (struct rtnexthop *)cfg->fc_mp;
4430
4431         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4432          * fib6_info structs per nexthop
4433          */
4434         while (rtnh_ok(rtnh, remaining)) {
4435                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4436                 if (rtnh->rtnh_ifindex)
4437                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4438
4439                 attrlen = rtnh_attrlen(rtnh);
4440                 if (attrlen > 0) {
4441                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4442
4443                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4444                         if (nla) {
4445                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
4446                                 r_cfg.fc_flags |= RTF_GATEWAY;
4447                         }
4448                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4449                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4450                         if (nla)
4451                                 r_cfg.fc_encap_type = nla_get_u16(nla);
4452                 }
4453
4454                 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4455                 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
4456                 if (IS_ERR(rt)) {
4457                         err = PTR_ERR(rt);
4458                         rt = NULL;
4459                         goto cleanup;
4460                 }
4461                 if (!rt6_qualify_for_ecmp(rt)) {
4462                         err = -EINVAL;
4463                         NL_SET_ERR_MSG(extack,
4464                                        "Device only routes can not be added for IPv6 using the multipath API.");
4465                         fib6_info_release(rt);
4466                         goto cleanup;
4467                 }
4468
4469                 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4470
4471                 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4472                                             rt, &r_cfg);
4473                 if (err) {
4474                         fib6_info_release(rt);
4475                         goto cleanup;
4476                 }
4477
4478                 rtnh = rtnh_next(rtnh, &remaining);
4479         }
4480
4481         /* for add and replace send one notification with all nexthops.
4482          * Skip the notification in fib6_add_rt2node and send one with
4483          * the full route when done
4484          */
4485         info->skip_notify = 1;
4486
4487         err_nh = NULL;
4488         list_for_each_entry(nh, &rt6_nh_list, next) {
4489                 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4490                 fib6_info_release(nh->fib6_info);
4491
4492                 if (!err) {
4493                         /* save reference to last route successfully inserted */
4494                         rt_last = nh->fib6_info;
4495
4496                         /* save reference to first route for notification */
4497                         if (!rt_notif)
4498                                 rt_notif = nh->fib6_info;
4499                 }
4500
4501                 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4502                 nh->fib6_info = NULL;
4503                 if (err) {
4504                         if (replace && nhn)
4505                                 ip6_print_replace_route_err(&rt6_nh_list);
4506                         err_nh = nh;
4507                         goto add_errout;
4508                 }
4509
4510                 /* Because each route is added like a single route we remove
4511                  * these flags after the first nexthop: if there is a collision,
4512                  * we have already failed to add the first nexthop:
4513                  * fib6_add_rt2node() has rejected it; when replacing, old
4514                  * nexthops have been replaced by first new, the rest should
4515                  * be added to it.
4516                  */
4517                 if (cfg->fc_nlinfo.nlh) {
4518                         cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4519                                                              NLM_F_REPLACE);
4520                         cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
4521                 }
4522                 nhn++;
4523         }
4524
4525         /* success ... tell user about new route */
4526         ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4527         goto cleanup;
4528
4529 add_errout:
4530         /* send notification for routes that were added so that
4531          * the delete notifications sent by ip6_route_del are
4532          * coherent
4533          */
4534         if (rt_notif)
4535                 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4536
4537         /* Delete routes that were already added */
4538         list_for_each_entry(nh, &rt6_nh_list, next) {
4539                 if (err_nh == nh)
4540                         break;
4541                 ip6_route_del(&nh->r_cfg, extack);
4542         }
4543
4544 cleanup:
4545         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4546                 if (nh->fib6_info)
4547                         fib6_info_release(nh->fib6_info);
4548                 list_del(&nh->next);
4549                 kfree(nh);
4550         }
4551
4552         return err;
4553 }
4554
4555 static int ip6_route_multipath_del(struct fib6_config *cfg,
4556                                    struct netlink_ext_ack *extack)
4557 {
4558         struct fib6_config r_cfg;
4559         struct rtnexthop *rtnh;
4560         int remaining;
4561         int attrlen;
4562         int err = 1, last_err = 0;
4563
4564         remaining = cfg->fc_mp_len;
4565         rtnh = (struct rtnexthop *)cfg->fc_mp;
4566
4567         /* Parse a Multipath Entry */
4568         while (rtnh_ok(rtnh, remaining)) {
4569                 memcpy(&r_cfg, cfg, sizeof(*cfg));
4570                 if (rtnh->rtnh_ifindex)
4571                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4572
4573                 attrlen = rtnh_attrlen(rtnh);
4574                 if (attrlen > 0) {
4575                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4576
4577                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4578                         if (nla) {
4579                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4580                                 r_cfg.fc_flags |= RTF_GATEWAY;
4581                         }
4582                 }
4583                 err = ip6_route_del(&r_cfg, extack);
4584                 if (err)
4585                         last_err = err;
4586
4587                 rtnh = rtnh_next(rtnh, &remaining);
4588         }
4589
4590         return last_err;
4591 }
4592
4593 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4594                               struct netlink_ext_ack *extack)
4595 {
4596         struct fib6_config cfg;
4597         int err;
4598
4599         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4600         if (err < 0)
4601                 return err;
4602
4603         if (cfg.fc_mp)
4604                 return ip6_route_multipath_del(&cfg, extack);
4605         else {
4606                 cfg.fc_delete_all_nh = 1;
4607                 return ip6_route_del(&cfg, extack);
4608         }
4609 }
4610
4611 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4612                               struct netlink_ext_ack *extack)
4613 {
4614         struct fib6_config cfg;
4615         int err;
4616
4617         err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
4618         if (err < 0)
4619                 return err;
4620
4621         if (cfg.fc_mp)
4622                 return ip6_route_multipath_add(&cfg, extack);
4623         else
4624                 return ip6_route_add(&cfg, GFP_KERNEL, extack);
4625 }
4626
4627 static size_t rt6_nlmsg_size(struct fib6_info *rt)
4628 {
4629         int nexthop_len = 0;
4630
4631         if (rt->fib6_nsiblings) {
4632                 nexthop_len = nla_total_size(0)  /* RTA_MULTIPATH */
4633                             + NLA_ALIGN(sizeof(struct rtnexthop))
4634                             + nla_total_size(16) /* RTA_GATEWAY */
4635                             + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4636
4637                 nexthop_len *= rt->fib6_nsiblings;
4638         }
4639
4640         return NLMSG_ALIGN(sizeof(struct rtmsg))
4641                + nla_total_size(16) /* RTA_SRC */
4642                + nla_total_size(16) /* RTA_DST */
4643                + nla_total_size(16) /* RTA_GATEWAY */
4644                + nla_total_size(16) /* RTA_PREFSRC */
4645                + nla_total_size(4) /* RTA_TABLE */
4646                + nla_total_size(4) /* RTA_IIF */
4647                + nla_total_size(4) /* RTA_OIF */
4648                + nla_total_size(4) /* RTA_PRIORITY */
4649                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4650                + nla_total_size(sizeof(struct rta_cacheinfo))
4651                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
4652                + nla_total_size(1) /* RTA_PREF */
4653                + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4654                + nexthop_len;
4655 }
4656
4657 static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
4658                             unsigned int *flags, bool skip_oif)
4659 {
4660         if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4661                 *flags |= RTNH_F_DEAD;
4662
4663         if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4664                 *flags |= RTNH_F_LINKDOWN;
4665
4666                 rcu_read_lock();
4667                 if (fib6_ignore_linkdown(rt))
4668                         *flags |= RTNH_F_DEAD;
4669                 rcu_read_unlock();
4670         }
4671
4672         if (rt->fib6_flags & RTF_GATEWAY) {
4673                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4674                         goto nla_put_failure;
4675         }
4676
4677         *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4678         if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
4679                 *flags |= RTNH_F_OFFLOAD;
4680
4681         /* not needed for multipath encoding b/c it has a rtnexthop struct */
4682         if (!skip_oif && rt->fib6_nh.nh_dev &&
4683             nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4684                 goto nla_put_failure;
4685
4686         if (rt->fib6_nh.nh_lwtstate &&
4687             lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4688                 goto nla_put_failure;
4689
4690         return 0;
4691
4692 nla_put_failure:
4693         return -EMSGSIZE;
4694 }
4695
4696 /* add multipath next hop */
4697 static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4698 {
4699         const struct net_device *dev = rt->fib6_nh.nh_dev;
4700         struct rtnexthop *rtnh;
4701         unsigned int flags = 0;
4702
4703         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4704         if (!rtnh)
4705                 goto nla_put_failure;
4706
4707         rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4708         rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4709
4710         if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4711                 goto nla_put_failure;
4712
4713         rtnh->rtnh_flags = flags;
4714
4715         /* length of rtnetlink header + attributes */
4716         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4717
4718         return 0;
4719
4720 nla_put_failure:
4721         return -EMSGSIZE;
4722 }
4723
4724 static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4725                          struct fib6_info *rt, struct dst_entry *dst,
4726                          struct in6_addr *dest, struct in6_addr *src,
4727                          int iif, int type, u32 portid, u32 seq,
4728                          unsigned int flags)
4729 {
4730         struct rt6_info *rt6 = (struct rt6_info *)dst;
4731         struct rt6key *rt6_dst, *rt6_src;
4732         u32 *pmetrics, table, rt6_flags;
4733         struct nlmsghdr *nlh;
4734         struct rtmsg *rtm;
4735         long expires = 0;
4736
4737         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
4738         if (!nlh)
4739                 return -EMSGSIZE;
4740
4741         if (rt6) {
4742                 rt6_dst = &rt6->rt6i_dst;
4743                 rt6_src = &rt6->rt6i_src;
4744                 rt6_flags = rt6->rt6i_flags;
4745         } else {
4746                 rt6_dst = &rt->fib6_dst;
4747                 rt6_src = &rt->fib6_src;
4748                 rt6_flags = rt->fib6_flags;
4749         }
4750
4751         rtm = nlmsg_data(nlh);
4752         rtm->rtm_family = AF_INET6;
4753         rtm->rtm_dst_len = rt6_dst->plen;
4754         rtm->rtm_src_len = rt6_src->plen;
4755         rtm->rtm_tos = 0;
4756         if (rt->fib6_table)
4757                 table = rt->fib6_table->tb6_id;
4758         else
4759                 table = RT6_TABLE_UNSPEC;
4760         rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4761         if (nla_put_u32(skb, RTA_TABLE, table))
4762                 goto nla_put_failure;
4763
4764         rtm->rtm_type = rt->fib6_type;
4765         rtm->rtm_flags = 0;
4766         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4767         rtm->rtm_protocol = rt->fib6_protocol;
4768
4769         if (rt6_flags & RTF_CACHE)
4770                 rtm->rtm_flags |= RTM_F_CLONED;
4771
4772         if (dest) {
4773                 if (nla_put_in6_addr(skb, RTA_DST, dest))
4774                         goto nla_put_failure;
4775                 rtm->rtm_dst_len = 128;
4776         } else if (rtm->rtm_dst_len)
4777                 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4778                         goto nla_put_failure;
4779 #ifdef CONFIG_IPV6_SUBTREES
4780         if (src) {
4781                 if (nla_put_in6_addr(skb, RTA_SRC, src))
4782                         goto nla_put_failure;
4783                 rtm->rtm_src_len = 128;
4784         } else if (rtm->rtm_src_len &&
4785                    nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4786                 goto nla_put_failure;
4787 #endif
4788         if (iif) {
4789 #ifdef CONFIG_IPV6_MROUTE
4790                 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4791                         int err = ip6mr_get_route(net, skb, rtm, portid);
4792
4793                         if (err == 0)
4794                                 return 0;
4795                         if (err < 0)
4796                                 goto nla_put_failure;
4797                 } else
4798 #endif
4799                         if (nla_put_u32(skb, RTA_IIF, iif))
4800                                 goto nla_put_failure;
4801         } else if (dest) {
4802                 struct in6_addr saddr_buf;
4803                 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4804                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4805                         goto nla_put_failure;
4806         }
4807
4808         if (rt->fib6_prefsrc.plen) {
4809                 struct in6_addr saddr_buf;
4810                 saddr_buf = rt->fib6_prefsrc.addr;
4811                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4812                         goto nla_put_failure;
4813         }
4814
4815         pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4816         if (rtnetlink_put_metrics(skb, pmetrics) < 0)
4817                 goto nla_put_failure;
4818
4819         if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4820                 goto nla_put_failure;
4821
4822         /* For multipath routes, walk the siblings list and add
4823          * each as a nexthop within RTA_MULTIPATH.
4824          */
4825         if (rt6) {
4826                 if (rt6_flags & RTF_GATEWAY &&
4827                     nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4828                         goto nla_put_failure;
4829
4830                 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4831                         goto nla_put_failure;
4832         } else if (rt->fib6_nsiblings) {
4833                 struct fib6_info *sibling, *next_sibling;
4834                 struct nlattr *mp;
4835
4836                 mp = nla_nest_start(skb, RTA_MULTIPATH);
4837                 if (!mp)
4838                         goto nla_put_failure;
4839
4840                 if (rt6_add_nexthop(skb, rt) < 0)
4841                         goto nla_put_failure;
4842
4843                 list_for_each_entry_safe(sibling, next_sibling,
4844                                          &rt->fib6_siblings, fib6_siblings) {
4845                         if (rt6_add_nexthop(skb, sibling) < 0)
4846                                 goto nla_put_failure;
4847                 }
4848
4849                 nla_nest_end(skb, mp);
4850         } else {
4851                 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4852                         goto nla_put_failure;
4853         }
4854
4855         if (rt6_flags & RTF_EXPIRES) {
4856                 expires = dst ? dst->expires : rt->expires;
4857                 expires -= jiffies;
4858         }
4859
4860         if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4861                 goto nla_put_failure;
4862
4863         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4864                 goto nla_put_failure;
4865
4866
4867         nlmsg_end(skb, nlh);
4868         return 0;
4869
4870 nla_put_failure:
4871         nlmsg_cancel(skb, nlh);
4872         return -EMSGSIZE;
4873 }
4874
4875 int rt6_dump_route(struct fib6_info *rt, void *p_arg)
4876 {
4877         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
4878         struct net *net = arg->net;
4879
4880         if (rt == net->ipv6.fib6_null_entry)
4881                 return 0;
4882
4883         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4884                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4885
4886                 /* user wants prefix routes only */
4887                 if (rtm->rtm_flags & RTM_F_PREFIX &&
4888                     !(rt->fib6_flags & RTF_PREFIX_RT)) {
4889                         /* success since this is not a prefix route */
4890                         return 1;
4891                 }
4892         }
4893
4894         return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4895                              RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4896                              arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
4897 }
4898
4899 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4900                               struct netlink_ext_ack *extack)
4901 {
4902         struct net *net = sock_net(in_skb->sk);
4903         struct nlattr *tb[RTA_MAX+1];
4904         int err, iif = 0, oif = 0;
4905         struct fib6_info *from;
4906         struct dst_entry *dst;
4907         struct rt6_info *rt;
4908         struct sk_buff *skb;
4909         struct rtmsg *rtm;
4910         struct flowi6 fl6;
4911         bool fibmatch;
4912
4913         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4914                           extack);
4915         if (err < 0)
4916                 goto errout;
4917
4918         err = -EINVAL;
4919         memset(&fl6, 0, sizeof(fl6));
4920         rtm = nlmsg_data(nlh);
4921         fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
4922         fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4923
4924         if (tb[RTA_SRC]) {
4925                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4926                         goto errout;
4927
4928                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4929         }
4930
4931         if (tb[RTA_DST]) {
4932                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4933                         goto errout;
4934
4935                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4936         }
4937
4938         if (tb[RTA_IIF])
4939                 iif = nla_get_u32(tb[RTA_IIF]);
4940
4941         if (tb[RTA_OIF])
4942                 oif = nla_get_u32(tb[RTA_OIF]);
4943
4944         if (tb[RTA_MARK])
4945                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4946
4947         if (tb[RTA_UID])
4948                 fl6.flowi6_uid = make_kuid(current_user_ns(),
4949                                            nla_get_u32(tb[RTA_UID]));
4950         else
4951                 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4952
4953         if (tb[RTA_SPORT])
4954                 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4955
4956         if (tb[RTA_DPORT])
4957                 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4958
4959         if (tb[RTA_IP_PROTO]) {
4960                 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4961                                                   &fl6.flowi6_proto, AF_INET6,
4962                                                   extack);
4963                 if (err)
4964                         goto errout;
4965         }
4966
4967         if (iif) {
4968                 struct net_device *dev;
4969                 int flags = 0;
4970
4971                 rcu_read_lock();
4972
4973                 dev = dev_get_by_index_rcu(net, iif);
4974                 if (!dev) {
4975                         rcu_read_unlock();
4976                         err = -ENODEV;
4977                         goto errout;
4978                 }
4979
4980                 fl6.flowi6_iif = iif;
4981
4982                 if (!ipv6_addr_any(&fl6.saddr))
4983                         flags |= RT6_LOOKUP_F_HAS_SADDR;
4984
4985                 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4986
4987                 rcu_read_unlock();
4988         } else {
4989                 fl6.flowi6_oif = oif;
4990
4991                 dst = ip6_route_output(net, NULL, &fl6);
4992         }
4993
4994
4995         rt = container_of(dst, struct rt6_info, dst);
4996         if (rt->dst.error) {
4997                 err = rt->dst.error;
4998                 ip6_rt_put(rt);
4999                 goto errout;
5000         }
5001
5002         if (rt == net->ipv6.ip6_null_entry) {
5003                 err = rt->dst.error;
5004                 ip6_rt_put(rt);
5005                 goto errout;
5006         }
5007
5008         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
5009         if (!skb) {
5010                 ip6_rt_put(rt);
5011                 err = -ENOBUFS;
5012                 goto errout;
5013         }
5014
5015         skb_dst_set(skb, &rt->dst);
5016
5017         rcu_read_lock();
5018         from = rcu_dereference(rt->from);
5019         if (from) {
5020                 if (fibmatch)
5021                         err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
5022                                             iif, RTM_NEWROUTE,
5023                                             NETLINK_CB(in_skb).portid,
5024                                             nlh->nlmsg_seq, 0);
5025                 else
5026                         err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5027                                             &fl6.saddr, iif, RTM_NEWROUTE,
5028                                             NETLINK_CB(in_skb).portid,
5029                                             nlh->nlmsg_seq, 0);
5030         } else {
5031                 err = -ENETUNREACH;
5032         }
5033         rcu_read_unlock();
5034
5035         if (err < 0) {
5036                 kfree_skb(skb);
5037                 goto errout;
5038         }
5039
5040         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5041 errout:
5042         return err;
5043 }
5044
5045 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
5046                      unsigned int nlm_flags)
5047 {
5048         struct sk_buff *skb;
5049         struct net *net = info->nl_net;
5050         u32 seq;
5051         int err;
5052
5053         err = -ENOBUFS;
5054         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
5055
5056         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
5057         if (!skb)
5058                 goto errout;
5059
5060         err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5061                             event, info->portid, seq, nlm_flags);
5062         if (err < 0) {
5063                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5064                 WARN_ON(err == -EMSGSIZE);
5065                 kfree_skb(skb);
5066                 goto errout;
5067         }
5068         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
5069                     info->nlh, gfp_any());
5070         return;
5071 errout:
5072         if (err < 0)
5073                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
5074 }
5075
5076 static int ip6_route_dev_notify(struct notifier_block *this,
5077                                 unsigned long event, void *ptr)
5078 {
5079         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5080         struct net *net = dev_net(dev);
5081
5082         if (!(dev->flags & IFF_LOOPBACK))
5083                 return NOTIFY_OK;
5084
5085         if (event == NETDEV_REGISTER) {
5086                 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
5087                 net->ipv6.ip6_null_entry->dst.dev = dev;
5088                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5089 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5090                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
5091                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5092                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
5093                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
5094 #endif
5095          } else if (event == NETDEV_UNREGISTER &&
5096                     dev->reg_state != NETREG_UNREGISTERED) {
5097                 /* NETDEV_UNREGISTER could be fired for multiple times by
5098                  * netdev_wait_allrefs(). Make sure we only call this once.
5099                  */
5100                 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5101 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5102                 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5103                 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5104 #endif
5105         }
5106
5107         return NOTIFY_OK;
5108 }
5109
5110 /*
5111  *      /proc
5112  */
5113
5114 #ifdef CONFIG_PROC_FS
5115 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5116 {
5117         struct net *net = (struct net *)seq->private;
5118         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
5119                    net->ipv6.rt6_stats->fib_nodes,
5120                    net->ipv6.rt6_stats->fib_route_nodes,
5121                    atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
5122                    net->ipv6.rt6_stats->fib_rt_entries,
5123                    net->ipv6.rt6_stats->fib_rt_cache,
5124                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
5125                    net->ipv6.rt6_stats->fib_discarded_routes);
5126
5127         return 0;
5128 }
5129 #endif  /* CONFIG_PROC_FS */
5130
5131 #ifdef CONFIG_SYSCTL
5132
5133 static
5134 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
5135                               void __user *buffer, size_t *lenp, loff_t *ppos)
5136 {
5137         struct net *net;
5138         int delay;
5139         if (!write)
5140                 return -EINVAL;
5141
5142         net = (struct net *)ctl->extra1;
5143         delay = net->ipv6.sysctl.flush_delay;
5144         proc_dointvec(ctl, write, buffer, lenp, ppos);
5145         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
5146         return 0;
5147 }
5148
5149 struct ctl_table ipv6_route_table_template[] = {
5150         {
5151                 .procname       =       "flush",
5152                 .data           =       &init_net.ipv6.sysctl.flush_delay,
5153                 .maxlen         =       sizeof(int),
5154                 .mode           =       0200,
5155                 .proc_handler   =       ipv6_sysctl_rtcache_flush
5156         },
5157         {
5158                 .procname       =       "gc_thresh",
5159                 .data           =       &ip6_dst_ops_template.gc_thresh,
5160                 .maxlen         =       sizeof(int),
5161                 .mode           =       0644,
5162                 .proc_handler   =       proc_dointvec,
5163         },
5164         {
5165                 .procname       =       "max_size",
5166                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
5167                 .maxlen         =       sizeof(int),
5168                 .mode           =       0644,
5169                 .proc_handler   =       proc_dointvec,
5170         },
5171         {
5172                 .procname       =       "gc_min_interval",
5173                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5174                 .maxlen         =       sizeof(int),
5175                 .mode           =       0644,
5176                 .proc_handler   =       proc_dointvec_jiffies,
5177         },
5178         {
5179                 .procname       =       "gc_timeout",
5180                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
5181                 .maxlen         =       sizeof(int),
5182                 .mode           =       0644,
5183                 .proc_handler   =       proc_dointvec_jiffies,
5184         },
5185         {
5186                 .procname       =       "gc_interval",
5187                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
5188                 .maxlen         =       sizeof(int),
5189                 .mode           =       0644,
5190                 .proc_handler   =       proc_dointvec_jiffies,
5191         },
5192         {
5193                 .procname       =       "gc_elasticity",
5194                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
5195                 .maxlen         =       sizeof(int),
5196                 .mode           =       0644,
5197                 .proc_handler   =       proc_dointvec,
5198         },
5199         {
5200                 .procname       =       "mtu_expires",
5201                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
5202                 .maxlen         =       sizeof(int),
5203                 .mode           =       0644,
5204                 .proc_handler   =       proc_dointvec_jiffies,
5205         },
5206         {
5207                 .procname       =       "min_adv_mss",
5208                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
5209                 .maxlen         =       sizeof(int),
5210                 .mode           =       0644,
5211                 .proc_handler   =       proc_dointvec,
5212         },
5213         {
5214                 .procname       =       "gc_min_interval_ms",
5215                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
5216                 .maxlen         =       sizeof(int),
5217                 .mode           =       0644,
5218                 .proc_handler   =       proc_dointvec_ms_jiffies,
5219         },
5220         { }
5221 };
5222
5223 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5224 {
5225         struct ctl_table *table;
5226
5227         table = kmemdup(ipv6_route_table_template,
5228                         sizeof(ipv6_route_table_template),
5229                         GFP_KERNEL);
5230
5231         if (table) {
5232                 table[0].data = &net->ipv6.sysctl.flush_delay;
5233                 table[0].extra1 = net;
5234                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5235                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5236                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5237                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5238                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5239                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5240                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5241                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
5242                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5243
5244                 /* Don't export sysctls to unprivileged users */
5245                 if (net->user_ns != &init_user_ns)
5246                         table[0].procname = NULL;
5247         }
5248
5249         return table;
5250 }
5251 #endif
5252
5253 static int __net_init ip6_route_net_init(struct net *net)
5254 {
5255         int ret = -ENOMEM;
5256
5257         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5258                sizeof(net->ipv6.ip6_dst_ops));
5259
5260         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5261                 goto out_ip6_dst_ops;
5262
5263         net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5264                                             sizeof(*net->ipv6.fib6_null_entry),
5265                                             GFP_KERNEL);
5266         if (!net->ipv6.fib6_null_entry)
5267                 goto out_ip6_dst_entries;
5268
5269         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5270                                            sizeof(*net->ipv6.ip6_null_entry),
5271                                            GFP_KERNEL);
5272         if (!net->ipv6.ip6_null_entry)
5273                 goto out_fib6_null_entry;
5274         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5275         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5276                          ip6_template_metrics, true);
5277
5278 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5279         net->ipv6.fib6_has_custom_rules = false;
5280         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5281                                                sizeof(*net->ipv6.ip6_prohibit_entry),
5282                                                GFP_KERNEL);
5283         if (!net->ipv6.ip6_prohibit_entry)
5284                 goto out_ip6_null_entry;
5285         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5286         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5287                          ip6_template_metrics, true);
5288
5289         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5290                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
5291                                                GFP_KERNEL);
5292         if (!net->ipv6.ip6_blk_hole_entry)
5293                 goto out_ip6_prohibit_entry;
5294         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
5295         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5296                          ip6_template_metrics, true);
5297 #endif
5298
5299         net->ipv6.sysctl.flush_delay = 0;
5300         net->ipv6.sysctl.ip6_rt_max_size = 4096;
5301         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5302         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5303         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5304         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5305         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5306         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5307
5308         net->ipv6.ip6_rt_gc_expire = 30*HZ;
5309
5310         ret = 0;
5311 out:
5312         return ret;
5313
5314 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5315 out_ip6_prohibit_entry:
5316         kfree(net->ipv6.ip6_prohibit_entry);
5317 out_ip6_null_entry:
5318         kfree(net->ipv6.ip6_null_entry);
5319 #endif
5320 out_fib6_null_entry:
5321         kfree(net->ipv6.fib6_null_entry);
5322 out_ip6_dst_entries:
5323         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5324 out_ip6_dst_ops:
5325         goto out;
5326 }
5327
5328 static void __net_exit ip6_route_net_exit(struct net *net)
5329 {
5330         kfree(net->ipv6.fib6_null_entry);
5331         kfree(net->ipv6.ip6_null_entry);
5332 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5333         kfree(net->ipv6.ip6_prohibit_entry);
5334         kfree(net->ipv6.ip6_blk_hole_entry);
5335 #endif
5336         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5337 }
5338
5339 static int __net_init ip6_route_net_init_late(struct net *net)
5340 {
5341 #ifdef CONFIG_PROC_FS
5342         proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5343                         sizeof(struct ipv6_route_iter));
5344         proc_create_net_single("rt6_stats", 0444, net->proc_net,
5345                         rt6_stats_seq_show, NULL);
5346 #endif
5347         return 0;
5348 }
5349
5350 static void __net_exit ip6_route_net_exit_late(struct net *net)
5351 {
5352 #ifdef CONFIG_PROC_FS
5353         remove_proc_entry("ipv6_route", net->proc_net);
5354         remove_proc_entry("rt6_stats", net->proc_net);
5355 #endif
5356 }
5357
5358 static struct pernet_operations ip6_route_net_ops = {
5359         .init = ip6_route_net_init,
5360         .exit = ip6_route_net_exit,
5361 };
5362
5363 static int __net_init ipv6_inetpeer_init(struct net *net)
5364 {
5365         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5366
5367         if (!bp)
5368                 return -ENOMEM;
5369         inet_peer_base_init(bp);
5370         net->ipv6.peers = bp;
5371         return 0;
5372 }
5373
5374 static void __net_exit ipv6_inetpeer_exit(struct net *net)
5375 {
5376         struct inet_peer_base *bp = net->ipv6.peers;
5377
5378         net->ipv6.peers = NULL;
5379         inetpeer_invalidate_tree(bp);
5380         kfree(bp);
5381 }
5382
5383 static struct pernet_operations ipv6_inetpeer_ops = {
5384         .init   =       ipv6_inetpeer_init,
5385         .exit   =       ipv6_inetpeer_exit,
5386 };
5387
5388 static struct pernet_operations ip6_route_net_late_ops = {
5389         .init = ip6_route_net_init_late,
5390         .exit = ip6_route_net_exit_late,
5391 };
5392
5393 static struct notifier_block ip6_route_dev_notifier = {
5394         .notifier_call = ip6_route_dev_notify,
5395         .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
5396 };
5397
5398 void __init ip6_route_init_special_entries(void)
5399 {
5400         /* Registering of the loopback is done before this portion of code,
5401          * the loopback reference in rt6_info will not be taken, do it
5402          * manually for init_net */
5403         init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5404         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5405         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5406   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5407         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5408         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5409         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5410         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5411   #endif
5412 }
5413
5414 int __init ip6_route_init(void)
5415 {
5416         int ret;
5417         int cpu;
5418
5419         ret = -ENOMEM;
5420         ip6_dst_ops_template.kmem_cachep =
5421                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5422                                   SLAB_HWCACHE_ALIGN, NULL);
5423         if (!ip6_dst_ops_template.kmem_cachep)
5424                 goto out;
5425
5426         ret = dst_entries_init(&ip6_dst_blackhole_ops);
5427         if (ret)
5428                 goto out_kmem_cache;
5429
5430         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5431         if (ret)
5432                 goto out_dst_entries;
5433
5434         ret = register_pernet_subsys(&ip6_route_net_ops);
5435         if (ret)
5436                 goto out_register_inetpeer;
5437
5438         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5439
5440         ret = fib6_init();
5441         if (ret)
5442                 goto out_register_subsys;
5443
5444         ret = xfrm6_init();
5445         if (ret)
5446                 goto out_fib6_init;
5447
5448         ret = fib6_rules_init();
5449         if (ret)
5450                 goto xfrm6_init;
5451
5452         ret = register_pernet_subsys(&ip6_route_net_late_ops);
5453         if (ret)
5454                 goto fib6_rules_init;
5455
5456         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5457                                    inet6_rtm_newroute, NULL, 0);
5458         if (ret < 0)
5459                 goto out_register_late_subsys;
5460
5461         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5462                                    inet6_rtm_delroute, NULL, 0);
5463         if (ret < 0)
5464                 goto out_register_late_subsys;
5465
5466         ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5467                                    inet6_rtm_getroute, NULL,
5468                                    RTNL_FLAG_DOIT_UNLOCKED);
5469         if (ret < 0)
5470                 goto out_register_late_subsys;
5471
5472         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5473         if (ret)
5474                 goto out_register_late_subsys;
5475
5476         for_each_possible_cpu(cpu) {
5477                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5478
5479                 INIT_LIST_HEAD(&ul->head);
5480                 spin_lock_init(&ul->lock);
5481         }
5482
5483 out:
5484         return ret;
5485
5486 out_register_late_subsys:
5487         rtnl_unregister_all(PF_INET6);
5488         unregister_pernet_subsys(&ip6_route_net_late_ops);
5489 fib6_rules_init:
5490         fib6_rules_cleanup();
5491 xfrm6_init:
5492         xfrm6_fini();
5493 out_fib6_init:
5494         fib6_gc_cleanup();
5495 out_register_subsys:
5496         unregister_pernet_subsys(&ip6_route_net_ops);
5497 out_register_inetpeer:
5498         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5499 out_dst_entries:
5500         dst_entries_destroy(&ip6_dst_blackhole_ops);
5501 out_kmem_cache:
5502         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5503         goto out;
5504 }
5505
5506 void ip6_route_cleanup(void)
5507 {
5508         unregister_netdevice_notifier(&ip6_route_dev_notifier);
5509         unregister_pernet_subsys(&ip6_route_net_late_ops);
5510         fib6_rules_cleanup();
5511         xfrm6_fini();
5512         fib6_gc_cleanup();
5513         unregister_pernet_subsys(&ipv6_inetpeer_ops);
5514         unregister_pernet_subsys(&ip6_route_net_ops);
5515         dst_entries_destroy(&ip6_dst_blackhole_ops);
5516         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5517 }