2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
19 #include <asm/uaccess.h>
20 #include <linux/types.h>
21 #include <linux/sched.h>
22 #include <linux/errno.h>
23 #include <linux/timer.h>
25 #include <linux/kernel.h>
26 #include <linux/fcntl.h>
27 #include <linux/stat.h>
28 #include <linux/socket.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/inetdevice.h>
32 #include <linux/proc_fs.h>
33 #include <linux/seq_file.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36 #include <linux/compat.h>
37 #include <net/protocol.h>
38 #include <linux/skbuff.h>
41 #include <linux/notifier.h>
42 #include <linux/if_arp.h>
43 #include <net/checksum.h>
44 #include <net/netlink.h>
45 #include <net/fib_rules.h>
48 #include <net/ip6_route.h>
49 #include <linux/mroute6.h>
50 #include <linux/pim.h>
51 #include <net/addrconf.h>
52 #include <linux/netfilter_ipv6.h>
53 #include <linux/export.h>
54 #include <net/ip6_checksum.h>
55 #include <linux/netconf.h>
58 struct list_head list;
61 struct sock *mroute6_sk;
62 struct timer_list ipmr_expire_timer;
63 struct list_head mfc6_unres_queue;
64 struct list_head mfc6_cache_array[MFC6_LINES];
65 struct mif_device vif6_table[MAXMIFS];
67 atomic_t cache_resolve_queue_len;
68 bool mroute_do_assert;
70 #ifdef CONFIG_IPV6_PIMSM_V2
71 int mroute_reg_vif_num;
75 #include <linux/nospec.h>
78 struct fib_rule common;
82 struct mr6_table *mrt;
85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
86 Note that the changes are semaphored via rtnl_lock.
89 static DEFINE_RWLOCK(mrt_lock);
92 * Multicast router control variables
95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
97 /* Special spinlock for queue of unresolved entries */
98 static DEFINE_SPINLOCK(mfc_unres_lock);
100 /* We return to original Alan's scheme. Hash table of resolved
101 entries is changed only in process context and protected
102 with weak lock mrt_lock. Queue of unresolved entries is protected
103 with strong spinlock mfc_unres_lock.
105 In this case data path is free of exclusive locks at all.
108 static struct kmem_cache *mrt_cachep __read_mostly;
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
113 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114 struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116 mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118 struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122 struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
124 static void ipmr_expire_process(unsigned long arg);
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
132 struct mr6_table *mrt;
134 ip6mr_for_each_table(mrt, net) {
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142 struct mr6_table **mrt)
145 struct ip6mr_result res;
146 struct fib_lookup_arg arg = {
148 .flags = FIB_LOOKUP_NOREF,
151 err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
152 flowi6_to_flowi(flp6), 0, &arg);
159 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
160 int flags, struct fib_lookup_arg *arg)
162 struct ip6mr_result *res = arg->result;
163 struct mr6_table *mrt;
165 switch (rule->action) {
168 case FR_ACT_UNREACHABLE:
170 case FR_ACT_PROHIBIT:
172 case FR_ACT_BLACKHOLE:
177 mrt = ip6mr_get_table(rule->fr_net, rule->table);
184 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
189 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
193 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
194 struct fib_rule_hdr *frh, struct nlattr **tb)
199 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
206 struct fib_rule_hdr *frh)
214 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
215 .family = RTNL_FAMILY_IP6MR,
216 .rule_size = sizeof(struct ip6mr_rule),
217 .addr_size = sizeof(struct in6_addr),
218 .action = ip6mr_rule_action,
219 .match = ip6mr_rule_match,
220 .configure = ip6mr_rule_configure,
221 .compare = ip6mr_rule_compare,
222 .fill = ip6mr_rule_fill,
223 .nlgroup = RTNLGRP_IPV6_RULE,
224 .policy = ip6mr_rule_policy,
225 .owner = THIS_MODULE,
228 static int __net_init ip6mr_rules_init(struct net *net)
230 struct fib_rules_ops *ops;
231 struct mr6_table *mrt;
234 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
238 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
240 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
246 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
250 net->ipv6.mr6_rules_ops = ops;
254 ip6mr_free_table(mrt);
256 fib_rules_unregister(ops);
260 static void __net_exit ip6mr_rules_exit(struct net *net)
262 struct mr6_table *mrt, *next;
265 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
266 list_del(&mrt->list);
267 ip6mr_free_table(mrt);
269 fib_rules_unregister(net->ipv6.mr6_rules_ops);
273 #define ip6mr_for_each_table(mrt, net) \
274 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
276 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
278 return net->ipv6.mrt6;
281 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
282 struct mr6_table **mrt)
284 *mrt = net->ipv6.mrt6;
288 static int __net_init ip6mr_rules_init(struct net *net)
290 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
291 return net->ipv6.mrt6 ? 0 : -ENOMEM;
294 static void __net_exit ip6mr_rules_exit(struct net *net)
297 ip6mr_free_table(net->ipv6.mrt6);
298 net->ipv6.mrt6 = NULL;
303 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
305 struct mr6_table *mrt;
308 mrt = ip6mr_get_table(net, id);
312 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
316 write_pnet(&mrt->net, net);
318 /* Forwarding cache */
319 for (i = 0; i < MFC6_LINES; i++)
320 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
322 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
324 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
327 #ifdef CONFIG_IPV6_PIMSM_V2
328 mrt->mroute_reg_vif_num = -1;
330 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
331 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
336 static void ip6mr_free_table(struct mr6_table *mrt)
338 del_timer_sync(&mrt->ipmr_expire_timer);
339 mroute_clean_tables(mrt, true);
343 #ifdef CONFIG_PROC_FS
345 struct ipmr_mfc_iter {
346 struct seq_net_private p;
347 struct mr6_table *mrt;
348 struct list_head *cache;
353 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
354 struct ipmr_mfc_iter *it, loff_t pos)
356 struct mr6_table *mrt = it->mrt;
357 struct mfc6_cache *mfc;
359 read_lock(&mrt_lock);
360 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
361 it->cache = &mrt->mfc6_cache_array[it->ct];
362 list_for_each_entry(mfc, it->cache, list)
366 read_unlock(&mrt_lock);
368 spin_lock_bh(&mfc_unres_lock);
369 it->cache = &mrt->mfc6_unres_queue;
370 list_for_each_entry(mfc, it->cache, list)
373 spin_unlock_bh(&mfc_unres_lock);
380 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
383 struct ipmr_vif_iter {
384 struct seq_net_private p;
385 struct mr6_table *mrt;
389 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
390 struct ipmr_vif_iter *iter,
393 struct mr6_table *mrt = iter->mrt;
395 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
396 if (!MIF_EXISTS(mrt, iter->ct))
399 return &mrt->vif6_table[iter->ct];
404 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
407 struct ipmr_vif_iter *iter = seq->private;
408 struct net *net = seq_file_net(seq);
409 struct mr6_table *mrt;
411 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
413 return ERR_PTR(-ENOENT);
417 read_lock(&mrt_lock);
418 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
422 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
424 struct ipmr_vif_iter *iter = seq->private;
425 struct net *net = seq_file_net(seq);
426 struct mr6_table *mrt = iter->mrt;
429 if (v == SEQ_START_TOKEN)
430 return ip6mr_vif_seq_idx(net, iter, 0);
432 while (++iter->ct < mrt->maxvif) {
433 if (!MIF_EXISTS(mrt, iter->ct))
435 return &mrt->vif6_table[iter->ct];
440 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
443 read_unlock(&mrt_lock);
446 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
448 struct ipmr_vif_iter *iter = seq->private;
449 struct mr6_table *mrt = iter->mrt;
451 if (v == SEQ_START_TOKEN) {
453 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
455 const struct mif_device *vif = v;
456 const char *name = vif->dev ? vif->dev->name : "none";
459 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
460 vif - mrt->vif6_table,
461 name, vif->bytes_in, vif->pkt_in,
462 vif->bytes_out, vif->pkt_out,
468 static const struct seq_operations ip6mr_vif_seq_ops = {
469 .start = ip6mr_vif_seq_start,
470 .next = ip6mr_vif_seq_next,
471 .stop = ip6mr_vif_seq_stop,
472 .show = ip6mr_vif_seq_show,
475 static int ip6mr_vif_open(struct inode *inode, struct file *file)
477 return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
478 sizeof(struct ipmr_vif_iter));
481 static const struct file_operations ip6mr_vif_fops = {
482 .owner = THIS_MODULE,
483 .open = ip6mr_vif_open,
486 .release = seq_release_net,
489 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
491 struct ipmr_mfc_iter *it = seq->private;
492 struct net *net = seq_file_net(seq);
493 struct mr6_table *mrt;
495 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
497 return ERR_PTR(-ENOENT);
501 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
505 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
507 struct mfc6_cache *mfc = v;
508 struct ipmr_mfc_iter *it = seq->private;
509 struct net *net = seq_file_net(seq);
510 struct mr6_table *mrt = it->mrt;
514 if (v == SEQ_START_TOKEN)
515 return ipmr_mfc_seq_idx(net, seq->private, 0);
517 if (mfc->list.next != it->cache)
518 return list_entry(mfc->list.next, struct mfc6_cache, list);
520 if (it->cache == &mrt->mfc6_unres_queue)
523 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
525 while (++it->ct < MFC6_LINES) {
526 it->cache = &mrt->mfc6_cache_array[it->ct];
527 if (list_empty(it->cache))
529 return list_first_entry(it->cache, struct mfc6_cache, list);
532 /* exhausted cache_array, show unresolved */
533 read_unlock(&mrt_lock);
534 it->cache = &mrt->mfc6_unres_queue;
537 spin_lock_bh(&mfc_unres_lock);
538 if (!list_empty(it->cache))
539 return list_first_entry(it->cache, struct mfc6_cache, list);
542 spin_unlock_bh(&mfc_unres_lock);
548 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
550 struct ipmr_mfc_iter *it = seq->private;
551 struct mr6_table *mrt = it->mrt;
553 if (it->cache == &mrt->mfc6_unres_queue)
554 spin_unlock_bh(&mfc_unres_lock);
555 else if (it->cache == &mrt->mfc6_cache_array[it->ct])
556 read_unlock(&mrt_lock);
559 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
563 if (v == SEQ_START_TOKEN) {
567 "Iif Pkts Bytes Wrong Oifs\n");
569 const struct mfc6_cache *mfc = v;
570 const struct ipmr_mfc_iter *it = seq->private;
571 struct mr6_table *mrt = it->mrt;
573 seq_printf(seq, "%pI6 %pI6 %-3hd",
574 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
577 if (it->cache != &mrt->mfc6_unres_queue) {
578 seq_printf(seq, " %8lu %8lu %8lu",
580 mfc->mfc_un.res.bytes,
581 mfc->mfc_un.res.wrong_if);
582 for (n = mfc->mfc_un.res.minvif;
583 n < mfc->mfc_un.res.maxvif; n++) {
584 if (MIF_EXISTS(mrt, n) &&
585 mfc->mfc_un.res.ttls[n] < 255)
588 n, mfc->mfc_un.res.ttls[n]);
591 /* unresolved mfc_caches don't contain
592 * pkt, bytes and wrong_if values
594 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
601 static const struct seq_operations ipmr_mfc_seq_ops = {
602 .start = ipmr_mfc_seq_start,
603 .next = ipmr_mfc_seq_next,
604 .stop = ipmr_mfc_seq_stop,
605 .show = ipmr_mfc_seq_show,
608 static int ipmr_mfc_open(struct inode *inode, struct file *file)
610 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
611 sizeof(struct ipmr_mfc_iter));
614 static const struct file_operations ip6mr_mfc_fops = {
615 .owner = THIS_MODULE,
616 .open = ipmr_mfc_open,
619 .release = seq_release_net,
623 #ifdef CONFIG_IPV6_PIMSM_V2
625 static int pim6_rcv(struct sk_buff *skb)
627 struct pimreghdr *pim;
628 struct ipv6hdr *encap;
629 struct net_device *reg_dev = NULL;
630 struct net *net = dev_net(skb->dev);
631 struct mr6_table *mrt;
632 struct flowi6 fl6 = {
633 .flowi6_iif = skb->dev->ifindex,
634 .flowi6_mark = skb->mark,
638 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
641 pim = (struct pimreghdr *)skb_transport_header(skb);
642 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
643 (pim->flags & PIM_NULL_REGISTER) ||
644 (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
645 sizeof(*pim), IPPROTO_PIM,
646 csum_partial((void *)pim, sizeof(*pim), 0)) &&
647 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
650 /* check if the inner packet is destined to mcast group */
651 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
654 if (!ipv6_addr_is_multicast(&encap->daddr) ||
655 encap->payload_len == 0 ||
656 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
659 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
661 reg_vif_num = mrt->mroute_reg_vif_num;
663 read_lock(&mrt_lock);
664 if (reg_vif_num >= 0)
665 reg_dev = mrt->vif6_table[reg_vif_num].dev;
668 read_unlock(&mrt_lock);
673 skb->mac_header = skb->network_header;
674 skb_pull(skb, (u8 *)encap - skb->data);
675 skb_reset_network_header(skb);
676 skb->protocol = htons(ETH_P_IPV6);
677 skb->ip_summed = CHECKSUM_NONE;
679 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
690 static const struct inet6_protocol pim6_protocol = {
694 /* Service routines creating virtual interfaces: PIMREG */
696 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
697 struct net_device *dev)
699 struct net *net = dev_net(dev);
700 struct mr6_table *mrt;
701 struct flowi6 fl6 = {
702 .flowi6_oif = dev->ifindex,
703 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
704 .flowi6_mark = skb->mark,
708 err = ip6mr_fib_lookup(net, &fl6, &mrt);
714 read_lock(&mrt_lock);
715 dev->stats.tx_bytes += skb->len;
716 dev->stats.tx_packets++;
717 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
718 read_unlock(&mrt_lock);
723 static int reg_vif_get_iflink(const struct net_device *dev)
728 static const struct net_device_ops reg_vif_netdev_ops = {
729 .ndo_start_xmit = reg_vif_xmit,
730 .ndo_get_iflink = reg_vif_get_iflink,
733 static void reg_vif_setup(struct net_device *dev)
735 dev->type = ARPHRD_PIMREG;
736 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
737 dev->flags = IFF_NOARP;
738 dev->netdev_ops = ®_vif_netdev_ops;
739 dev->destructor = free_netdev;
740 dev->features |= NETIF_F_NETNS_LOCAL;
743 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
745 struct net_device *dev;
748 if (mrt->id == RT6_TABLE_DFLT)
749 sprintf(name, "pim6reg");
751 sprintf(name, "pim6reg%u", mrt->id);
753 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
757 dev_net_set(dev, net);
759 if (register_netdevice(dev)) {
771 unregister_netdevice(dev);
780 static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
781 struct list_head *head)
783 struct mif_device *v;
784 struct net_device *dev;
785 struct inet6_dev *in6_dev;
787 if (vifi < 0 || vifi >= mrt->maxvif)
788 return -EADDRNOTAVAIL;
790 v = &mrt->vif6_table[vifi];
792 write_lock_bh(&mrt_lock);
797 write_unlock_bh(&mrt_lock);
798 return -EADDRNOTAVAIL;
801 #ifdef CONFIG_IPV6_PIMSM_V2
802 if (vifi == mrt->mroute_reg_vif_num)
803 mrt->mroute_reg_vif_num = -1;
806 if (vifi + 1 == mrt->maxvif) {
808 for (tmp = vifi - 1; tmp >= 0; tmp--) {
809 if (MIF_EXISTS(mrt, tmp))
812 mrt->maxvif = tmp + 1;
815 write_unlock_bh(&mrt_lock);
817 dev_set_allmulti(dev, -1);
819 in6_dev = __in6_dev_get(dev);
821 in6_dev->cnf.mc_forwarding--;
822 inet6_netconf_notify_devconf(dev_net(dev),
823 NETCONFA_MC_FORWARDING,
824 dev->ifindex, &in6_dev->cnf);
827 if ((v->flags & MIFF_REGISTER) && !notify)
828 unregister_netdevice_queue(dev, head);
834 static inline void ip6mr_cache_free(struct mfc6_cache *c)
836 kmem_cache_free(mrt_cachep, c);
839 /* Destroy an unresolved cache entry, killing queued skbs
840 and reporting error to netlink readers.
843 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
845 struct net *net = read_pnet(&mrt->net);
848 atomic_dec(&mrt->cache_resolve_queue_len);
850 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
851 if (ipv6_hdr(skb)->version == 0) {
852 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
853 nlh->nlmsg_type = NLMSG_ERROR;
854 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
855 skb_trim(skb, nlh->nlmsg_len);
856 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
857 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
866 /* Timer process for all the unresolved queue. */
868 static void ipmr_do_expire_process(struct mr6_table *mrt)
870 unsigned long now = jiffies;
871 unsigned long expires = 10 * HZ;
872 struct mfc6_cache *c, *next;
874 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
875 if (time_after(c->mfc_un.unres.expires, now)) {
877 unsigned long interval = c->mfc_un.unres.expires - now;
878 if (interval < expires)
884 mr6_netlink_event(mrt, c, RTM_DELROUTE);
885 ip6mr_destroy_unres(mrt, c);
888 if (!list_empty(&mrt->mfc6_unres_queue))
889 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
892 static void ipmr_expire_process(unsigned long arg)
894 struct mr6_table *mrt = (struct mr6_table *)arg;
896 if (!spin_trylock(&mfc_unres_lock)) {
897 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
901 if (!list_empty(&mrt->mfc6_unres_queue))
902 ipmr_do_expire_process(mrt);
904 spin_unlock(&mfc_unres_lock);
907 /* Fill oifs list. It is called under write locked mrt_lock. */
909 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
914 cache->mfc_un.res.minvif = MAXMIFS;
915 cache->mfc_un.res.maxvif = 0;
916 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
918 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
919 if (MIF_EXISTS(mrt, vifi) &&
920 ttls[vifi] && ttls[vifi] < 255) {
921 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
922 if (cache->mfc_un.res.minvif > vifi)
923 cache->mfc_un.res.minvif = vifi;
924 if (cache->mfc_un.res.maxvif <= vifi)
925 cache->mfc_un.res.maxvif = vifi + 1;
930 static int mif6_add(struct net *net, struct mr6_table *mrt,
931 struct mif6ctl *vifc, int mrtsock)
933 int vifi = vifc->mif6c_mifi;
934 struct mif_device *v = &mrt->vif6_table[vifi];
935 struct net_device *dev;
936 struct inet6_dev *in6_dev;
940 if (MIF_EXISTS(mrt, vifi))
943 switch (vifc->mif6c_flags) {
944 #ifdef CONFIG_IPV6_PIMSM_V2
947 * Special Purpose VIF in PIM
948 * All the packets will be sent to the daemon
950 if (mrt->mroute_reg_vif_num >= 0)
952 dev = ip6mr_reg_vif(net, mrt);
955 err = dev_set_allmulti(dev, 1);
957 unregister_netdevice(dev);
964 dev = dev_get_by_index(net, vifc->mif6c_pifi);
966 return -EADDRNOTAVAIL;
967 err = dev_set_allmulti(dev, 1);
977 in6_dev = __in6_dev_get(dev);
979 in6_dev->cnf.mc_forwarding++;
980 inet6_netconf_notify_devconf(dev_net(dev),
981 NETCONFA_MC_FORWARDING,
982 dev->ifindex, &in6_dev->cnf);
986 * Fill in the VIF structures
988 v->rate_limit = vifc->vifc_rate_limit;
989 v->flags = vifc->mif6c_flags;
991 v->flags |= VIFF_STATIC;
992 v->threshold = vifc->vifc_threshold;
997 v->link = dev->ifindex;
998 if (v->flags & MIFF_REGISTER)
999 v->link = dev_get_iflink(dev);
1001 /* And finish update writing critical data */
1002 write_lock_bh(&mrt_lock);
1004 #ifdef CONFIG_IPV6_PIMSM_V2
1005 if (v->flags & MIFF_REGISTER)
1006 mrt->mroute_reg_vif_num = vifi;
1008 if (vifi + 1 > mrt->maxvif)
1009 mrt->maxvif = vifi + 1;
1010 write_unlock_bh(&mrt_lock);
1014 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1015 const struct in6_addr *origin,
1016 const struct in6_addr *mcastgrp)
1018 int line = MFC6_HASH(mcastgrp, origin);
1019 struct mfc6_cache *c;
1021 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1022 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1023 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1029 /* Look for a (*,*,oif) entry */
1030 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1033 int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1034 struct mfc6_cache *c;
1036 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1037 if (ipv6_addr_any(&c->mf6c_origin) &&
1038 ipv6_addr_any(&c->mf6c_mcastgrp) &&
1039 (c->mfc_un.res.ttls[mifi] < 255))
1045 /* Look for a (*,G) entry */
1046 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1047 struct in6_addr *mcastgrp,
1050 int line = MFC6_HASH(mcastgrp, &in6addr_any);
1051 struct mfc6_cache *c, *proxy;
1053 if (ipv6_addr_any(mcastgrp))
1056 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1057 if (ipv6_addr_any(&c->mf6c_origin) &&
1058 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1059 if (c->mfc_un.res.ttls[mifi] < 255)
1062 /* It's ok if the mifi is part of the static tree */
1063 proxy = ip6mr_cache_find_any_parent(mrt,
1065 if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1070 return ip6mr_cache_find_any_parent(mrt, mifi);
1074 * Allocate a multicast cache entry
1076 static struct mfc6_cache *ip6mr_cache_alloc(void)
1078 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1081 c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
1082 c->mfc_un.res.minvif = MAXMIFS;
1086 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1088 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1091 skb_queue_head_init(&c->mfc_un.unres.unresolved);
1092 c->mfc_un.unres.expires = jiffies + 10 * HZ;
1097 * A cache entry has gone into a resolved state from queued
1100 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1101 struct mfc6_cache *uc, struct mfc6_cache *c)
1103 struct sk_buff *skb;
1106 * Play the pending entries through our router
1109 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1110 if (ipv6_hdr(skb)->version == 0) {
1111 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1113 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1114 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1116 nlh->nlmsg_type = NLMSG_ERROR;
1117 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1118 skb_trim(skb, nlh->nlmsg_len);
1119 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1121 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1123 ip6_mr_forward(net, mrt, skb, c);
1128 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1129 * expects the following bizarre scheme.
1131 * Called under mrt_lock.
1134 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1135 mifi_t mifi, int assert)
1137 struct sk_buff *skb;
1138 struct mrt6msg *msg;
1141 #ifdef CONFIG_IPV6_PIMSM_V2
1142 if (assert == MRT6MSG_WHOLEPKT)
1143 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1147 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1152 /* I suppose that internal messages
1153 * do not require checksums */
1155 skb->ip_summed = CHECKSUM_UNNECESSARY;
1157 #ifdef CONFIG_IPV6_PIMSM_V2
1158 if (assert == MRT6MSG_WHOLEPKT) {
1159 /* Ugly, but we have no choice with this interface.
1160 Duplicate old header, fix length etc.
1161 And all this only to mangle msg->im6_msgtype and
1162 to set msg->im6_mbz to "mbz" :-)
1164 skb_push(skb, -skb_network_offset(pkt));
1166 skb_push(skb, sizeof(*msg));
1167 skb_reset_transport_header(skb);
1168 msg = (struct mrt6msg *)skb_transport_header(skb);
1170 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1171 msg->im6_mif = mrt->mroute_reg_vif_num;
1173 msg->im6_src = ipv6_hdr(pkt)->saddr;
1174 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1176 skb->ip_summed = CHECKSUM_UNNECESSARY;
1181 * Copy the IP header
1184 skb_put(skb, sizeof(struct ipv6hdr));
1185 skb_reset_network_header(skb);
1186 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1191 skb_put(skb, sizeof(*msg));
1192 skb_reset_transport_header(skb);
1193 msg = (struct mrt6msg *)skb_transport_header(skb);
1196 msg->im6_msgtype = assert;
1197 msg->im6_mif = mifi;
1199 msg->im6_src = ipv6_hdr(pkt)->saddr;
1200 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1202 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1203 skb->ip_summed = CHECKSUM_UNNECESSARY;
1206 if (!mrt->mroute6_sk) {
1212 * Deliver to user space multicast routing algorithms
1214 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1216 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1224 * Queue a packet for resolution. It gets locked cache entry!
1228 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1232 struct mfc6_cache *c;
1234 spin_lock_bh(&mfc_unres_lock);
1235 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1236 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1237 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1245 * Create a new entry if allowable
1248 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1249 (c = ip6mr_cache_alloc_unres()) == NULL) {
1250 spin_unlock_bh(&mfc_unres_lock);
1257 * Fill in the new cache entry
1259 c->mf6c_parent = -1;
1260 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1261 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1264 * Reflect first query at pim6sd
1266 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1268 /* If the report failed throw the cache entry
1271 spin_unlock_bh(&mfc_unres_lock);
1273 ip6mr_cache_free(c);
1278 atomic_inc(&mrt->cache_resolve_queue_len);
1279 list_add(&c->list, &mrt->mfc6_unres_queue);
1280 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1282 ipmr_do_expire_process(mrt);
1286 * See if we can append the packet
1288 if (c->mfc_un.unres.unresolved.qlen > 3) {
1292 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1296 spin_unlock_bh(&mfc_unres_lock);
1301 * MFC6 cache manipulation by user space
1304 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1308 struct mfc6_cache *c, *next;
1310 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1312 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1313 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1314 ipv6_addr_equal(&c->mf6c_mcastgrp,
1315 &mfc->mf6cc_mcastgrp.sin6_addr) &&
1316 (parent == -1 || parent == c->mf6c_parent)) {
1317 write_lock_bh(&mrt_lock);
1319 write_unlock_bh(&mrt_lock);
1321 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1322 ip6mr_cache_free(c);
1329 static int ip6mr_device_event(struct notifier_block *this,
1330 unsigned long event, void *ptr)
1332 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1333 struct net *net = dev_net(dev);
1334 struct mr6_table *mrt;
1335 struct mif_device *v;
1338 if (event != NETDEV_UNREGISTER)
1341 ip6mr_for_each_table(mrt, net) {
1342 v = &mrt->vif6_table[0];
1343 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1345 mif6_delete(mrt, ct, 1, NULL);
1352 static struct notifier_block ip6_mr_notifier = {
1353 .notifier_call = ip6mr_device_event
1357 * Setup for IP multicast routing
1360 static int __net_init ip6mr_net_init(struct net *net)
1364 err = ip6mr_rules_init(net);
1368 #ifdef CONFIG_PROC_FS
1370 if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1372 if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1373 goto proc_cache_fail;
1378 #ifdef CONFIG_PROC_FS
1380 remove_proc_entry("ip6_mr_vif", net->proc_net);
1382 ip6mr_rules_exit(net);
1388 static void __net_exit ip6mr_net_exit(struct net *net)
1390 #ifdef CONFIG_PROC_FS
1391 remove_proc_entry("ip6_mr_cache", net->proc_net);
1392 remove_proc_entry("ip6_mr_vif", net->proc_net);
1394 ip6mr_rules_exit(net);
1397 static struct pernet_operations ip6mr_net_ops = {
1398 .init = ip6mr_net_init,
1399 .exit = ip6mr_net_exit,
1402 int __init ip6_mr_init(void)
1406 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1407 sizeof(struct mfc6_cache),
1408 0, SLAB_HWCACHE_ALIGN,
1413 err = register_pernet_subsys(&ip6mr_net_ops);
1415 goto reg_pernet_fail;
1417 err = register_netdevice_notifier(&ip6_mr_notifier);
1419 goto reg_notif_fail;
1420 #ifdef CONFIG_IPV6_PIMSM_V2
1421 if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1422 pr_err("%s: can't add PIM protocol\n", __func__);
1424 goto add_proto_fail;
1427 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1428 ip6mr_rtm_dumproute, NULL);
1430 #ifdef CONFIG_IPV6_PIMSM_V2
1432 unregister_netdevice_notifier(&ip6_mr_notifier);
1435 unregister_pernet_subsys(&ip6mr_net_ops);
1437 kmem_cache_destroy(mrt_cachep);
1441 void ip6_mr_cleanup(void)
1443 rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1444 #ifdef CONFIG_IPV6_PIMSM_V2
1445 inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1447 unregister_netdevice_notifier(&ip6_mr_notifier);
1448 unregister_pernet_subsys(&ip6mr_net_ops);
1449 kmem_cache_destroy(mrt_cachep);
1452 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1453 struct mf6cctl *mfc, int mrtsock, int parent)
1457 struct mfc6_cache *uc, *c;
1458 unsigned char ttls[MAXMIFS];
1461 if (mfc->mf6cc_parent >= MAXMIFS)
1464 memset(ttls, 255, MAXMIFS);
1465 for (i = 0; i < MAXMIFS; i++) {
1466 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1471 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1473 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1474 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1475 ipv6_addr_equal(&c->mf6c_mcastgrp,
1476 &mfc->mf6cc_mcastgrp.sin6_addr) &&
1477 (parent == -1 || parent == mfc->mf6cc_parent)) {
1484 write_lock_bh(&mrt_lock);
1485 c->mf6c_parent = mfc->mf6cc_parent;
1486 ip6mr_update_thresholds(mrt, c, ttls);
1488 c->mfc_flags |= MFC_STATIC;
1489 write_unlock_bh(&mrt_lock);
1490 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1494 if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1495 !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1498 c = ip6mr_cache_alloc();
1502 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1503 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1504 c->mf6c_parent = mfc->mf6cc_parent;
1505 ip6mr_update_thresholds(mrt, c, ttls);
1507 c->mfc_flags |= MFC_STATIC;
1509 write_lock_bh(&mrt_lock);
1510 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1511 write_unlock_bh(&mrt_lock);
1514 * Check to see if we resolved a queued list. If so we
1515 * need to send on the frames and tidy up.
1518 spin_lock_bh(&mfc_unres_lock);
1519 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1520 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1521 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1522 list_del(&uc->list);
1523 atomic_dec(&mrt->cache_resolve_queue_len);
1528 if (list_empty(&mrt->mfc6_unres_queue))
1529 del_timer(&mrt->ipmr_expire_timer);
1530 spin_unlock_bh(&mfc_unres_lock);
1533 ip6mr_cache_resolve(net, mrt, uc, c);
1534 ip6mr_cache_free(uc);
1536 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1541 * Close the multicast socket, and clear the vif tables etc
1544 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1548 struct mfc6_cache *c, *next;
1551 * Shut down all active vif entries
1553 for (i = 0; i < mrt->maxvif; i++) {
1554 if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1556 mif6_delete(mrt, i, 0, &list);
1558 unregister_netdevice_many(&list);
1563 for (i = 0; i < MFC6_LINES; i++) {
1564 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1565 if (!all && (c->mfc_flags & MFC_STATIC))
1567 write_lock_bh(&mrt_lock);
1569 write_unlock_bh(&mrt_lock);
1571 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1572 ip6mr_cache_free(c);
1576 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1577 spin_lock_bh(&mfc_unres_lock);
1578 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1580 mr6_netlink_event(mrt, c, RTM_DELROUTE);
1581 ip6mr_destroy_unres(mrt, c);
1583 spin_unlock_bh(&mfc_unres_lock);
1587 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1590 struct net *net = sock_net(sk);
1593 write_lock_bh(&mrt_lock);
1594 if (likely(mrt->mroute6_sk == NULL)) {
1595 mrt->mroute6_sk = sk;
1596 net->ipv6.devconf_all->mc_forwarding++;
1600 write_unlock_bh(&mrt_lock);
1603 inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1604 NETCONFA_IFINDEX_ALL,
1605 net->ipv6.devconf_all);
1611 int ip6mr_sk_done(struct sock *sk)
1614 struct net *net = sock_net(sk);
1615 struct mr6_table *mrt;
1618 ip6mr_for_each_table(mrt, net) {
1619 if (sk == mrt->mroute6_sk) {
1620 write_lock_bh(&mrt_lock);
1621 mrt->mroute6_sk = NULL;
1622 net->ipv6.devconf_all->mc_forwarding--;
1623 write_unlock_bh(&mrt_lock);
1624 inet6_netconf_notify_devconf(net,
1625 NETCONFA_MC_FORWARDING,
1626 NETCONFA_IFINDEX_ALL,
1627 net->ipv6.devconf_all);
1629 mroute_clean_tables(mrt, false);
1639 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1641 struct mr6_table *mrt;
1642 struct flowi6 fl6 = {
1643 .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
1644 .flowi6_oif = skb->dev->ifindex,
1645 .flowi6_mark = skb->mark,
1648 if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1651 return mrt->mroute6_sk;
1655 * Socket options and virtual interface manipulation. The whole
1656 * virtual interface system is a complete heap, but unfortunately
1657 * that's how BSD mrouted happens to think. Maybe one day with a proper
1658 * MOSPF/PIM router set up we can clean this up.
1661 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1663 int ret, parent = 0;
1667 struct net *net = sock_net(sk);
1668 struct mr6_table *mrt;
1670 if (sk->sk_type != SOCK_RAW ||
1671 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1674 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1678 if (optname != MRT6_INIT) {
1679 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1685 if (optlen < sizeof(int))
1688 return ip6mr_sk_init(mrt, sk);
1691 return ip6mr_sk_done(sk);
1694 if (optlen < sizeof(vif))
1696 if (copy_from_user(&vif, optval, sizeof(vif)))
1698 if (vif.mif6c_mifi >= MAXMIFS)
1701 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1706 if (optlen < sizeof(mifi_t))
1708 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1711 ret = mif6_delete(mrt, mifi, 0, NULL);
1716 * Manipulate the forwarding caches. These live
1717 * in a sort of kernel/user symbiosis.
1722 case MRT6_ADD_MFC_PROXY:
1723 case MRT6_DEL_MFC_PROXY:
1724 if (optlen < sizeof(mfc))
1726 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1729 parent = mfc.mf6cc_parent;
1731 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1732 ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1734 ret = ip6mr_mfc_add(net, mrt, &mfc,
1735 sk == mrt->mroute6_sk, parent);
1740 * Control PIM assert (to activate pim will activate assert)
1746 if (optlen != sizeof(v))
1748 if (get_user(v, (int __user *)optval))
1750 mrt->mroute_do_assert = v;
1754 #ifdef CONFIG_IPV6_PIMSM_V2
1759 if (optlen != sizeof(v))
1761 if (get_user(v, (int __user *)optval))
1766 if (v != mrt->mroute_do_pim) {
1767 mrt->mroute_do_pim = v;
1768 mrt->mroute_do_assert = v;
1775 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1780 if (optlen != sizeof(u32))
1782 if (get_user(v, (u32 __user *)optval))
1784 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1785 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1787 if (sk == mrt->mroute6_sk)
1792 if (!ip6mr_new_table(net, v))
1795 raw6_sk(sk)->ip6mr_table = v;
1801 * Spurious command, or MRT6_VERSION which you cannot
1805 return -ENOPROTOOPT;
1810 * Getsock opt support for the multicast routing system.
1813 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1818 struct net *net = sock_net(sk);
1819 struct mr6_table *mrt;
1821 if (sk->sk_type != SOCK_RAW ||
1822 inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1825 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1833 #ifdef CONFIG_IPV6_PIMSM_V2
1835 val = mrt->mroute_do_pim;
1839 val = mrt->mroute_do_assert;
1842 return -ENOPROTOOPT;
1845 if (get_user(olr, optlen))
1848 olr = min_t(int, olr, sizeof(int));
1852 if (put_user(olr, optlen))
1854 if (copy_to_user(optval, &val, olr))
1860 * The IP multicast ioctl support routines.
1863 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1865 struct sioc_sg_req6 sr;
1866 struct sioc_mif_req6 vr;
1867 struct mif_device *vif;
1868 struct mfc6_cache *c;
1869 struct net *net = sock_net(sk);
1870 struct mr6_table *mrt;
1872 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1877 case SIOCGETMIFCNT_IN6:
1878 if (copy_from_user(&vr, arg, sizeof(vr)))
1880 if (vr.mifi >= mrt->maxvif)
1882 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1883 read_lock(&mrt_lock);
1884 vif = &mrt->vif6_table[vr.mifi];
1885 if (MIF_EXISTS(mrt, vr.mifi)) {
1886 vr.icount = vif->pkt_in;
1887 vr.ocount = vif->pkt_out;
1888 vr.ibytes = vif->bytes_in;
1889 vr.obytes = vif->bytes_out;
1890 read_unlock(&mrt_lock);
1892 if (copy_to_user(arg, &vr, sizeof(vr)))
1896 read_unlock(&mrt_lock);
1897 return -EADDRNOTAVAIL;
1898 case SIOCGETSGCNT_IN6:
1899 if (copy_from_user(&sr, arg, sizeof(sr)))
1902 read_lock(&mrt_lock);
1903 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1905 sr.pktcnt = c->mfc_un.res.pkt;
1906 sr.bytecnt = c->mfc_un.res.bytes;
1907 sr.wrong_if = c->mfc_un.res.wrong_if;
1908 read_unlock(&mrt_lock);
1910 if (copy_to_user(arg, &sr, sizeof(sr)))
1914 read_unlock(&mrt_lock);
1915 return -EADDRNOTAVAIL;
1917 return -ENOIOCTLCMD;
1921 #ifdef CONFIG_COMPAT
1922 struct compat_sioc_sg_req6 {
1923 struct sockaddr_in6 src;
1924 struct sockaddr_in6 grp;
1925 compat_ulong_t pktcnt;
1926 compat_ulong_t bytecnt;
1927 compat_ulong_t wrong_if;
1930 struct compat_sioc_mif_req6 {
1932 compat_ulong_t icount;
1933 compat_ulong_t ocount;
1934 compat_ulong_t ibytes;
1935 compat_ulong_t obytes;
1938 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1940 struct compat_sioc_sg_req6 sr;
1941 struct compat_sioc_mif_req6 vr;
1942 struct mif_device *vif;
1943 struct mfc6_cache *c;
1944 struct net *net = sock_net(sk);
1945 struct mr6_table *mrt;
1947 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1952 case SIOCGETMIFCNT_IN6:
1953 if (copy_from_user(&vr, arg, sizeof(vr)))
1955 if (vr.mifi >= mrt->maxvif)
1957 vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
1958 read_lock(&mrt_lock);
1959 vif = &mrt->vif6_table[vr.mifi];
1960 if (MIF_EXISTS(mrt, vr.mifi)) {
1961 vr.icount = vif->pkt_in;
1962 vr.ocount = vif->pkt_out;
1963 vr.ibytes = vif->bytes_in;
1964 vr.obytes = vif->bytes_out;
1965 read_unlock(&mrt_lock);
1967 if (copy_to_user(arg, &vr, sizeof(vr)))
1971 read_unlock(&mrt_lock);
1972 return -EADDRNOTAVAIL;
1973 case SIOCGETSGCNT_IN6:
1974 if (copy_from_user(&sr, arg, sizeof(sr)))
1977 read_lock(&mrt_lock);
1978 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1980 sr.pktcnt = c->mfc_un.res.pkt;
1981 sr.bytecnt = c->mfc_un.res.bytes;
1982 sr.wrong_if = c->mfc_un.res.wrong_if;
1983 read_unlock(&mrt_lock);
1985 if (copy_to_user(arg, &sr, sizeof(sr)))
1989 read_unlock(&mrt_lock);
1990 return -EADDRNOTAVAIL;
1992 return -ENOIOCTLCMD;
1997 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1999 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
2000 IPSTATS_MIB_OUTFORWDATAGRAMS);
2001 IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
2002 IPSTATS_MIB_OUTOCTETS, skb->len);
2003 return dst_output(net, sk, skb);
2007 * Processing handlers for ip6mr_forward
2010 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
2011 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2013 struct ipv6hdr *ipv6h;
2014 struct mif_device *vif = &mrt->vif6_table[vifi];
2015 struct net_device *dev;
2016 struct dst_entry *dst;
2022 #ifdef CONFIG_IPV6_PIMSM_V2
2023 if (vif->flags & MIFF_REGISTER) {
2025 vif->bytes_out += skb->len;
2026 vif->dev->stats.tx_bytes += skb->len;
2027 vif->dev->stats.tx_packets++;
2028 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2033 ipv6h = ipv6_hdr(skb);
2035 fl6 = (struct flowi6) {
2036 .flowi6_oif = vif->link,
2037 .daddr = ipv6h->daddr,
2040 dst = ip6_route_output(net, NULL, &fl6);
2047 skb_dst_set(skb, dst);
2050 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2051 * not only before forwarding, but after forwarding on all output
2052 * interfaces. It is clear, if mrouter runs a multicasting
2053 * program, it should receive packets not depending to what interface
2054 * program is joined.
2055 * If we will not make it, the program will have to join on all
2056 * interfaces. On the other hand, multihoming host (or router, but
2057 * not mrouter) cannot join to more than one interface - it will
2058 * result in receiving multiple packets.
2063 vif->bytes_out += skb->len;
2065 /* We are about to write */
2066 /* XXX: extension headers? */
2067 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2070 ipv6h = ipv6_hdr(skb);
2073 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2075 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2076 net, NULL, skb, skb->dev, dev,
2077 ip6mr_forward2_finish);
2084 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2088 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2089 if (mrt->vif6_table[ct].dev == dev)
2095 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2096 struct sk_buff *skb, struct mfc6_cache *cache)
2100 int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2102 vif = cache->mf6c_parent;
2103 cache->mfc_un.res.pkt++;
2104 cache->mfc_un.res.bytes += skb->len;
2106 if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2107 struct mfc6_cache *cache_proxy;
2109 /* For an (*,G) entry, we only check that the incoming
2110 * interface is part of the static tree.
2112 cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2114 cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2119 * Wrong interface: drop packet and (maybe) send PIM assert.
2121 if (mrt->vif6_table[vif].dev != skb->dev) {
2122 cache->mfc_un.res.wrong_if++;
2124 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2125 /* pimsm uses asserts, when switching from RPT to SPT,
2126 so that we cannot check that packet arrived on an oif.
2127 It is bad, but otherwise we would need to move pretty
2128 large chunk of pimd to kernel. Ough... --ANK
2130 (mrt->mroute_do_pim ||
2131 cache->mfc_un.res.ttls[true_vifi] < 255) &&
2133 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2134 cache->mfc_un.res.last_assert = jiffies;
2135 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2141 mrt->vif6_table[vif].pkt_in++;
2142 mrt->vif6_table[vif].bytes_in += skb->len;
2147 if (ipv6_addr_any(&cache->mf6c_origin) &&
2148 ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2149 if (true_vifi >= 0 &&
2150 true_vifi != cache->mf6c_parent &&
2151 ipv6_hdr(skb)->hop_limit >
2152 cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2153 /* It's an (*,*) entry and the packet is not coming from
2154 * the upstream: forward the packet to the upstream
2157 psend = cache->mf6c_parent;
2162 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2163 /* For (*,G) entry, don't forward to the incoming interface */
2164 if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2165 ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2167 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2169 ip6mr_forward2(net, mrt, skb2, cache, psend);
2176 ip6mr_forward2(net, mrt, skb, cache, psend);
2186 * Multicast packets for forwarding arrive here
2189 int ip6_mr_input(struct sk_buff *skb)
2191 struct mfc6_cache *cache;
2192 struct net *net = dev_net(skb->dev);
2193 struct mr6_table *mrt;
2194 struct flowi6 fl6 = {
2195 .flowi6_iif = skb->dev->ifindex,
2196 .flowi6_mark = skb->mark,
2200 err = ip6mr_fib_lookup(net, &fl6, &mrt);
2206 read_lock(&mrt_lock);
2207 cache = ip6mr_cache_find(mrt,
2208 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2210 int vif = ip6mr_find_vif(mrt, skb->dev);
2213 cache = ip6mr_cache_find_any(mrt,
2214 &ipv6_hdr(skb)->daddr,
2219 * No usable cache entry
2224 vif = ip6mr_find_vif(mrt, skb->dev);
2226 int err = ip6mr_cache_unresolved(mrt, vif, skb);
2227 read_unlock(&mrt_lock);
2231 read_unlock(&mrt_lock);
2236 ip6_mr_forward(net, mrt, skb, cache);
2238 read_unlock(&mrt_lock);
2244 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2245 struct mfc6_cache *c, struct rtmsg *rtm)
2248 struct rtnexthop *nhp;
2249 struct nlattr *mp_attr;
2250 struct rta_mfc_stats mfcs;
2252 /* If cache is unresolved, don't try to parse IIF and OIF */
2253 if (c->mf6c_parent >= MAXMIFS)
2256 if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2257 nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2259 mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2263 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2264 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2265 nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2267 nla_nest_cancel(skb, mp_attr);
2271 nhp->rtnh_flags = 0;
2272 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2273 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2274 nhp->rtnh_len = sizeof(*nhp);
2278 nla_nest_end(skb, mp_attr);
2280 mfcs.mfcs_packets = c->mfc_un.res.pkt;
2281 mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2282 mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2283 if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2286 rtm->rtm_type = RTN_MULTICAST;
2290 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
2291 int nowait, u32 portid)
2294 struct mr6_table *mrt;
2295 struct mfc6_cache *cache;
2296 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2298 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2302 read_lock(&mrt_lock);
2303 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2304 if (!cache && skb->dev) {
2305 int vif = ip6mr_find_vif(mrt, skb->dev);
2308 cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2313 struct sk_buff *skb2;
2314 struct ipv6hdr *iph;
2315 struct net_device *dev;
2319 read_unlock(&mrt_lock);
2324 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2325 read_unlock(&mrt_lock);
2329 /* really correct? */
2330 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2332 read_unlock(&mrt_lock);
2336 NETLINK_CB(skb2).portid = portid;
2337 skb_reset_transport_header(skb2);
2339 skb_put(skb2, sizeof(struct ipv6hdr));
2340 skb_reset_network_header(skb2);
2342 iph = ipv6_hdr(skb2);
2345 iph->flow_lbl[0] = 0;
2346 iph->flow_lbl[1] = 0;
2347 iph->flow_lbl[2] = 0;
2348 iph->payload_len = 0;
2349 iph->nexthdr = IPPROTO_NONE;
2351 iph->saddr = rt->rt6i_src.addr;
2352 iph->daddr = rt->rt6i_dst.addr;
2354 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2355 read_unlock(&mrt_lock);
2360 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2361 cache->mfc_flags |= MFC_NOTIFY;
2363 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2364 read_unlock(&mrt_lock);
2368 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2369 u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2372 struct nlmsghdr *nlh;
2376 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2380 rtm = nlmsg_data(nlh);
2381 rtm->rtm_family = RTNL_FAMILY_IP6MR;
2382 rtm->rtm_dst_len = 128;
2383 rtm->rtm_src_len = 128;
2385 rtm->rtm_table = mrt->id;
2386 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2387 goto nla_put_failure;
2388 rtm->rtm_type = RTN_MULTICAST;
2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2390 if (c->mfc_flags & MFC_STATIC)
2391 rtm->rtm_protocol = RTPROT_STATIC;
2393 rtm->rtm_protocol = RTPROT_MROUTED;
2396 if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2397 nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2398 goto nla_put_failure;
2399 err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2400 /* do not break the dump if cache is unresolved */
2401 if (err < 0 && err != -ENOENT)
2402 goto nla_put_failure;
2404 nlmsg_end(skb, nlh);
2408 nlmsg_cancel(skb, nlh);
2412 static int mr6_msgsize(bool unresolved, int maxvif)
2415 NLMSG_ALIGN(sizeof(struct rtmsg))
2416 + nla_total_size(4) /* RTA_TABLE */
2417 + nla_total_size(sizeof(struct in6_addr)) /* RTA_SRC */
2418 + nla_total_size(sizeof(struct in6_addr)) /* RTA_DST */
2423 + nla_total_size(4) /* RTA_IIF */
2424 + nla_total_size(0) /* RTA_MULTIPATH */
2425 + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2427 + nla_total_size(sizeof(struct rta_mfc_stats))
2433 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2436 struct net *net = read_pnet(&mrt->net);
2437 struct sk_buff *skb;
2440 skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2445 err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2449 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2455 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2458 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2460 struct net *net = sock_net(skb->sk);
2461 struct mr6_table *mrt;
2462 struct mfc6_cache *mfc;
2463 unsigned int t = 0, s_t;
2464 unsigned int h = 0, s_h;
2465 unsigned int e = 0, s_e;
2471 read_lock(&mrt_lock);
2472 ip6mr_for_each_table(mrt, net) {
2477 for (h = s_h; h < MFC6_LINES; h++) {
2478 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2481 if (ip6mr_fill_mroute(mrt, skb,
2482 NETLINK_CB(cb->skb).portid,
2492 spin_lock_bh(&mfc_unres_lock);
2493 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2496 if (ip6mr_fill_mroute(mrt, skb,
2497 NETLINK_CB(cb->skb).portid,
2501 spin_unlock_bh(&mfc_unres_lock);
2507 spin_unlock_bh(&mfc_unres_lock);
2514 read_unlock(&mrt_lock);