1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
46 #define neigh_dbg(level, fmt, ...) \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
52 #define PNEIGH_HASHMASK 0xF
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 struct net_device *dev);
62 static const struct seq_operations neigh_stat_seq_ops;
66 Neighbour hash table buckets are protected with rwlock tbl->lock.
68 - All the scans/updates to hash buckets MUST be made under this lock.
69 - NOTHING clever should be made under this lock: no callbacks
70 to protocol backends, no attempts to send something to network.
71 It will result in deadlocks, if backend/driver wants to use neighbour
73 - If the entry requires some non-trivial actions, increase
74 its reference count and release table lock.
76 Neighbour entries are protected:
77 - with reference count.
78 - with rwlock neigh->lock
80 Reference count prevents destruction.
82 neigh->lock mainly serializes ll address data and its validity state.
83 However, the same lock is used to protect another entry fields:
87 Again, nothing clever shall be made under neigh->lock,
88 the most complicated procedure, which we allow is dev->hard_header.
89 It is supposed, that dev->hard_header is simplistic and does
90 not make callbacks to neighbour tables.
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 trace_neigh_cleanup_and_release(neigh, 0);
102 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
104 neigh_release(neigh);
108 * It is random distribution in the interval (1/2)*base...(3/2)*base.
109 * It corresponds to default IPv6 settings and is not overridable,
110 * because it is really reasonable choice.
113 unsigned long neigh_rand_reach_time(unsigned long base)
115 return base ? (prandom_u32() % base) + (base >> 1) : 0;
117 EXPORT_SYMBOL(neigh_rand_reach_time);
119 static void neigh_mark_dead(struct neighbour *n)
122 if (!list_empty(&n->gc_list)) {
123 list_del_init(&n->gc_list);
124 atomic_dec(&n->tbl->gc_entries);
128 static void neigh_update_gc_list(struct neighbour *n)
130 bool on_gc_list, exempt_from_gc;
132 write_lock_bh(&n->tbl->lock);
133 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
155 write_unlock(&n->lock);
156 write_unlock_bh(&n->tbl->lock);
159 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
165 if (!(flags & NEIGH_UPDATE_F_ADMIN))
168 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
169 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
170 if (ndm_flags & NTF_EXT_LEARNED)
171 neigh->flags |= NTF_EXT_LEARNED;
173 neigh->flags &= ~NTF_EXT_LEARNED;
181 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
182 struct neigh_table *tbl)
186 write_lock(&n->lock);
187 if (refcount_read(&n->refcnt) == 1) {
188 struct neighbour *neigh;
190 neigh = rcu_dereference_protected(n->next,
191 lockdep_is_held(&tbl->lock));
192 rcu_assign_pointer(*np, neigh);
196 write_unlock(&n->lock);
198 neigh_cleanup_and_release(n);
202 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
204 struct neigh_hash_table *nht;
205 void *pkey = ndel->primary_key;
208 struct neighbour __rcu **np;
210 nht = rcu_dereference_protected(tbl->nht,
211 lockdep_is_held(&tbl->lock));
212 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
213 hash_val = hash_val >> (32 - nht->hash_shift);
215 np = &nht->hash_buckets[hash_val];
216 while ((n = rcu_dereference_protected(*np,
217 lockdep_is_held(&tbl->lock)))) {
219 return neigh_del(n, np, tbl);
225 static int neigh_forced_gc(struct neigh_table *tbl)
227 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
228 unsigned long tref = jiffies - 5 * HZ;
229 struct neighbour *n, *tmp;
232 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
234 write_lock_bh(&tbl->lock);
236 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
237 if (refcount_read(&n->refcnt) == 1) {
240 write_lock(&n->lock);
241 if ((n->nud_state == NUD_FAILED) ||
242 (n->nud_state == NUD_NOARP) ||
243 (tbl->is_multicast &&
244 tbl->is_multicast(n->primary_key)) ||
245 time_after(tref, n->updated))
247 write_unlock(&n->lock);
249 if (remove && neigh_remove_one(n, tbl))
251 if (shrunk >= max_clean)
256 tbl->last_flush = jiffies;
258 write_unlock_bh(&tbl->lock);
263 static void neigh_add_timer(struct neighbour *n, unsigned long when)
266 if (unlikely(mod_timer(&n->timer, when))) {
267 printk("NEIGH: BUG, double timer add, state is %x\n",
273 static int neigh_del_timer(struct neighbour *n)
275 if ((n->nud_state & NUD_IN_TIMER) &&
276 del_timer(&n->timer)) {
283 static void pneigh_queue_purge(struct sk_buff_head *list)
287 while ((skb = skb_dequeue(list)) != NULL) {
293 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
297 struct neigh_hash_table *nht;
299 nht = rcu_dereference_protected(tbl->nht,
300 lockdep_is_held(&tbl->lock));
302 for (i = 0; i < (1 << nht->hash_shift); i++) {
304 struct neighbour __rcu **np = &nht->hash_buckets[i];
306 while ((n = rcu_dereference_protected(*np,
307 lockdep_is_held(&tbl->lock))) != NULL) {
308 if (dev && n->dev != dev) {
312 if (skip_perm && n->nud_state & NUD_PERMANENT) {
316 rcu_assign_pointer(*np,
317 rcu_dereference_protected(n->next,
318 lockdep_is_held(&tbl->lock)));
319 write_lock(&n->lock);
322 if (refcount_read(&n->refcnt) != 1) {
323 /* The most unpleasant situation.
324 We must destroy neighbour entry,
325 but someone still uses it.
327 The destroy will be delayed until
328 the last user releases us, but
329 we must kill timers etc. and move
332 __skb_queue_purge(&n->arp_queue);
333 n->arp_queue_len_bytes = 0;
334 n->output = neigh_blackhole;
335 if (n->nud_state & NUD_VALID)
336 n->nud_state = NUD_NOARP;
338 n->nud_state = NUD_NONE;
339 neigh_dbg(2, "neigh %p is stray\n", n);
341 write_unlock(&n->lock);
342 neigh_cleanup_and_release(n);
347 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
349 write_lock_bh(&tbl->lock);
350 neigh_flush_dev(tbl, dev, false);
351 write_unlock_bh(&tbl->lock);
353 EXPORT_SYMBOL(neigh_changeaddr);
355 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
358 write_lock_bh(&tbl->lock);
359 neigh_flush_dev(tbl, dev, skip_perm);
360 pneigh_ifdown_and_unlock(tbl, dev);
362 del_timer_sync(&tbl->proxy_timer);
363 pneigh_queue_purge(&tbl->proxy_queue);
367 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
369 __neigh_ifdown(tbl, dev, true);
372 EXPORT_SYMBOL(neigh_carrier_down);
374 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
376 __neigh_ifdown(tbl, dev, false);
379 EXPORT_SYMBOL(neigh_ifdown);
381 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
382 struct net_device *dev,
383 u8 flags, bool exempt_from_gc)
385 struct neighbour *n = NULL;
386 unsigned long now = jiffies;
392 entries = atomic_inc_return(&tbl->gc_entries) - 1;
393 if (entries >= tbl->gc_thresh3 ||
394 (entries >= tbl->gc_thresh2 &&
395 time_after(now, tbl->last_flush + 5 * HZ))) {
396 if (!neigh_forced_gc(tbl) &&
397 entries >= tbl->gc_thresh3) {
398 net_info_ratelimited("%s: neighbor table overflow!\n",
400 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
406 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
410 __skb_queue_head_init(&n->arp_queue);
411 rwlock_init(&n->lock);
412 seqlock_init(&n->ha_lock);
413 n->updated = n->used = now;
414 n->nud_state = NUD_NONE;
415 n->output = neigh_blackhole;
417 seqlock_init(&n->hh.hh_lock);
418 n->parms = neigh_parms_clone(&tbl->parms);
419 timer_setup(&n->timer, neigh_timer_handler, 0);
421 NEIGH_CACHE_STAT_INC(tbl, allocs);
423 refcount_set(&n->refcnt, 1);
425 INIT_LIST_HEAD(&n->gc_list);
427 atomic_inc(&tbl->entries);
433 atomic_dec(&tbl->gc_entries);
437 static void neigh_get_hash_rnd(u32 *x)
439 *x = get_random_u32() | 1;
442 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
444 size_t size = (1 << shift) * sizeof(struct neighbour *);
445 struct neigh_hash_table *ret;
446 struct neighbour __rcu **buckets;
449 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
452 if (size <= PAGE_SIZE) {
453 buckets = kzalloc(size, GFP_ATOMIC);
455 buckets = (struct neighbour __rcu **)
456 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
458 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
464 ret->hash_buckets = buckets;
465 ret->hash_shift = shift;
466 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
467 neigh_get_hash_rnd(&ret->hash_rnd[i]);
471 static void neigh_hash_free_rcu(struct rcu_head *head)
473 struct neigh_hash_table *nht = container_of(head,
474 struct neigh_hash_table,
476 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
477 struct neighbour __rcu **buckets = nht->hash_buckets;
479 if (size <= PAGE_SIZE) {
482 kmemleak_free(buckets);
483 free_pages((unsigned long)buckets, get_order(size));
488 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
489 unsigned long new_shift)
491 unsigned int i, hash;
492 struct neigh_hash_table *new_nht, *old_nht;
494 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
496 old_nht = rcu_dereference_protected(tbl->nht,
497 lockdep_is_held(&tbl->lock));
498 new_nht = neigh_hash_alloc(new_shift);
502 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
503 struct neighbour *n, *next;
505 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
506 lockdep_is_held(&tbl->lock));
509 hash = tbl->hash(n->primary_key, n->dev,
512 hash >>= (32 - new_nht->hash_shift);
513 next = rcu_dereference_protected(n->next,
514 lockdep_is_held(&tbl->lock));
516 rcu_assign_pointer(n->next,
517 rcu_dereference_protected(
518 new_nht->hash_buckets[hash],
519 lockdep_is_held(&tbl->lock)));
520 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
524 rcu_assign_pointer(tbl->nht, new_nht);
525 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
529 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
530 struct net_device *dev)
534 NEIGH_CACHE_STAT_INC(tbl, lookups);
537 n = __neigh_lookup_noref(tbl, pkey, dev);
539 if (!refcount_inc_not_zero(&n->refcnt))
541 NEIGH_CACHE_STAT_INC(tbl, hits);
544 rcu_read_unlock_bh();
547 EXPORT_SYMBOL(neigh_lookup);
549 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
553 unsigned int key_len = tbl->key_len;
555 struct neigh_hash_table *nht;
557 NEIGH_CACHE_STAT_INC(tbl, lookups);
560 nht = rcu_dereference_bh(tbl->nht);
561 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
563 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
565 n = rcu_dereference_bh(n->next)) {
566 if (!memcmp(n->primary_key, pkey, key_len) &&
567 net_eq(dev_net(n->dev), net)) {
568 if (!refcount_inc_not_zero(&n->refcnt))
570 NEIGH_CACHE_STAT_INC(tbl, hits);
575 rcu_read_unlock_bh();
578 EXPORT_SYMBOL(neigh_lookup_nodev);
580 static struct neighbour *
581 ___neigh_create(struct neigh_table *tbl, const void *pkey,
582 struct net_device *dev, u8 flags,
583 bool exempt_from_gc, bool want_ref)
585 u32 hash_val, key_len = tbl->key_len;
586 struct neighbour *n1, *rc, *n;
587 struct neigh_hash_table *nht;
590 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
591 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
593 rc = ERR_PTR(-ENOBUFS);
597 memcpy(n->primary_key, pkey, key_len);
601 /* Protocol specific setup. */
602 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
604 goto out_neigh_release;
607 if (dev->netdev_ops->ndo_neigh_construct) {
608 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
611 goto out_neigh_release;
615 /* Device specific setup. */
616 if (n->parms->neigh_setup &&
617 (error = n->parms->neigh_setup(n)) < 0) {
619 goto out_neigh_release;
622 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
624 write_lock_bh(&tbl->lock);
625 nht = rcu_dereference_protected(tbl->nht,
626 lockdep_is_held(&tbl->lock));
628 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
629 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
631 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
633 if (n->parms->dead) {
634 rc = ERR_PTR(-EINVAL);
638 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
639 lockdep_is_held(&tbl->lock));
641 n1 = rcu_dereference_protected(n1->next,
642 lockdep_is_held(&tbl->lock))) {
643 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
653 list_add_tail(&n->gc_list, &n->tbl->gc_list);
657 rcu_assign_pointer(n->next,
658 rcu_dereference_protected(nht->hash_buckets[hash_val],
659 lockdep_is_held(&tbl->lock)));
660 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
661 write_unlock_bh(&tbl->lock);
662 neigh_dbg(2, "neigh %p is created\n", n);
667 write_unlock_bh(&tbl->lock);
670 atomic_dec(&tbl->gc_entries);
675 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
676 struct net_device *dev, bool want_ref)
678 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
680 EXPORT_SYMBOL(__neigh_create);
682 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
684 u32 hash_val = *(u32 *)(pkey + key_len - 4);
685 hash_val ^= (hash_val >> 16);
686 hash_val ^= hash_val >> 8;
687 hash_val ^= hash_val >> 4;
688 hash_val &= PNEIGH_HASHMASK;
692 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
695 unsigned int key_len,
696 struct net_device *dev)
699 if (!memcmp(n->key, pkey, key_len) &&
700 net_eq(pneigh_net(n), net) &&
701 (n->dev == dev || !n->dev))
708 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
709 struct net *net, const void *pkey, struct net_device *dev)
711 unsigned int key_len = tbl->key_len;
712 u32 hash_val = pneigh_hash(pkey, key_len);
714 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
715 net, pkey, key_len, dev);
717 EXPORT_SYMBOL_GPL(__pneigh_lookup);
719 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
720 struct net *net, const void *pkey,
721 struct net_device *dev, int creat)
723 struct pneigh_entry *n;
724 unsigned int key_len = tbl->key_len;
725 u32 hash_val = pneigh_hash(pkey, key_len);
727 read_lock_bh(&tbl->lock);
728 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
729 net, pkey, key_len, dev);
730 read_unlock_bh(&tbl->lock);
737 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
741 write_pnet(&n->net, net);
742 memcpy(n->key, pkey, key_len);
747 if (tbl->pconstructor && tbl->pconstructor(n)) {
755 write_lock_bh(&tbl->lock);
756 n->next = tbl->phash_buckets[hash_val];
757 tbl->phash_buckets[hash_val] = n;
758 write_unlock_bh(&tbl->lock);
762 EXPORT_SYMBOL(pneigh_lookup);
765 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
766 struct net_device *dev)
768 struct pneigh_entry *n, **np;
769 unsigned int key_len = tbl->key_len;
770 u32 hash_val = pneigh_hash(pkey, key_len);
772 write_lock_bh(&tbl->lock);
773 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
775 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
776 net_eq(pneigh_net(n), net)) {
778 write_unlock_bh(&tbl->lock);
779 if (tbl->pdestructor)
787 write_unlock_bh(&tbl->lock);
791 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
792 struct net_device *dev)
794 struct pneigh_entry *n, **np, *freelist = NULL;
797 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
798 np = &tbl->phash_buckets[h];
799 while ((n = *np) != NULL) {
800 if (!dev || n->dev == dev) {
809 write_unlock_bh(&tbl->lock);
810 while ((n = freelist)) {
813 if (tbl->pdestructor)
822 static void neigh_parms_destroy(struct neigh_parms *parms);
824 static inline void neigh_parms_put(struct neigh_parms *parms)
826 if (refcount_dec_and_test(&parms->refcnt))
827 neigh_parms_destroy(parms);
831 * neighbour must already be out of the table;
834 void neigh_destroy(struct neighbour *neigh)
836 struct net_device *dev = neigh->dev;
838 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
841 pr_warn("Destroying alive neighbour %p\n", neigh);
846 if (neigh_del_timer(neigh))
847 pr_warn("Impossible event\n");
849 write_lock_bh(&neigh->lock);
850 __skb_queue_purge(&neigh->arp_queue);
851 write_unlock_bh(&neigh->lock);
852 neigh->arp_queue_len_bytes = 0;
854 if (dev->netdev_ops->ndo_neigh_destroy)
855 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
858 neigh_parms_put(neigh->parms);
860 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
862 atomic_dec(&neigh->tbl->entries);
863 kfree_rcu(neigh, rcu);
865 EXPORT_SYMBOL(neigh_destroy);
867 /* Neighbour state is suspicious;
870 Called with write_locked neigh.
872 static void neigh_suspect(struct neighbour *neigh)
874 neigh_dbg(2, "neigh %p is suspected\n", neigh);
876 neigh->output = neigh->ops->output;
879 /* Neighbour state is OK;
882 Called with write_locked neigh.
884 static void neigh_connect(struct neighbour *neigh)
886 neigh_dbg(2, "neigh %p is connected\n", neigh);
888 neigh->output = neigh->ops->connected_output;
891 static void neigh_periodic_work(struct work_struct *work)
893 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
895 struct neighbour __rcu **np;
897 struct neigh_hash_table *nht;
899 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
901 write_lock_bh(&tbl->lock);
902 nht = rcu_dereference_protected(tbl->nht,
903 lockdep_is_held(&tbl->lock));
906 * periodically recompute ReachableTime from random function
909 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
910 struct neigh_parms *p;
911 tbl->last_rand = jiffies;
912 list_for_each_entry(p, &tbl->parms_list, list)
914 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
917 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
920 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
921 np = &nht->hash_buckets[i];
923 while ((n = rcu_dereference_protected(*np,
924 lockdep_is_held(&tbl->lock))) != NULL) {
927 write_lock(&n->lock);
929 state = n->nud_state;
930 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
931 (n->flags & NTF_EXT_LEARNED)) {
932 write_unlock(&n->lock);
936 if (time_before(n->used, n->confirmed))
937 n->used = n->confirmed;
939 if (refcount_read(&n->refcnt) == 1 &&
940 (state == NUD_FAILED ||
941 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
944 write_unlock(&n->lock);
945 neigh_cleanup_and_release(n);
948 write_unlock(&n->lock);
954 * It's fine to release lock here, even if hash table
955 * grows while we are preempted.
957 write_unlock_bh(&tbl->lock);
959 write_lock_bh(&tbl->lock);
960 nht = rcu_dereference_protected(tbl->nht,
961 lockdep_is_held(&tbl->lock));
964 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
965 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
966 * BASE_REACHABLE_TIME.
968 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
969 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
970 write_unlock_bh(&tbl->lock);
973 static __inline__ int neigh_max_probes(struct neighbour *n)
975 struct neigh_parms *p = n->parms;
976 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
977 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
978 NEIGH_VAR(p, MCAST_PROBES));
981 static void neigh_invalidate(struct neighbour *neigh)
982 __releases(neigh->lock)
983 __acquires(neigh->lock)
987 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
988 neigh_dbg(2, "neigh %p is failed\n", neigh);
989 neigh->updated = jiffies;
991 /* It is very thin place. report_unreachable is very complicated
992 routine. Particularly, it can hit the same neighbour entry!
994 So that, we try to be accurate and avoid dead loop. --ANK
996 while (neigh->nud_state == NUD_FAILED &&
997 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
998 write_unlock(&neigh->lock);
999 neigh->ops->error_report(neigh, skb);
1000 write_lock(&neigh->lock);
1002 __skb_queue_purge(&neigh->arp_queue);
1003 neigh->arp_queue_len_bytes = 0;
1006 static void neigh_probe(struct neighbour *neigh)
1007 __releases(neigh->lock)
1009 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1010 /* keep skb alive even if arp_queue overflows */
1012 skb = skb_clone(skb, GFP_ATOMIC);
1013 write_unlock(&neigh->lock);
1014 if (neigh->ops->solicit)
1015 neigh->ops->solicit(neigh, skb);
1016 atomic_inc(&neigh->probes);
1020 /* Called when a timer expires for a neighbour entry. */
1022 static void neigh_timer_handler(struct timer_list *t)
1024 unsigned long now, next;
1025 struct neighbour *neigh = from_timer(neigh, t, timer);
1029 write_lock(&neigh->lock);
1031 state = neigh->nud_state;
1035 if (!(state & NUD_IN_TIMER))
1038 if (state & NUD_REACHABLE) {
1039 if (time_before_eq(now,
1040 neigh->confirmed + neigh->parms->reachable_time)) {
1041 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1042 next = neigh->confirmed + neigh->parms->reachable_time;
1043 } else if (time_before_eq(now,
1045 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1046 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1047 neigh->nud_state = NUD_DELAY;
1048 neigh->updated = jiffies;
1049 neigh_suspect(neigh);
1050 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1052 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1053 neigh->nud_state = NUD_STALE;
1054 neigh->updated = jiffies;
1055 neigh_suspect(neigh);
1058 } else if (state & NUD_DELAY) {
1059 if (time_before_eq(now,
1061 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1062 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1063 neigh->nud_state = NUD_REACHABLE;
1064 neigh->updated = jiffies;
1065 neigh_connect(neigh);
1067 next = neigh->confirmed + neigh->parms->reachable_time;
1069 neigh_dbg(2, "neigh %p is probed\n", neigh);
1070 neigh->nud_state = NUD_PROBE;
1071 neigh->updated = jiffies;
1072 atomic_set(&neigh->probes, 0);
1074 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1077 /* NUD_PROBE|NUD_INCOMPLETE */
1078 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1081 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1082 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1083 neigh->nud_state = NUD_FAILED;
1085 neigh_invalidate(neigh);
1089 if (neigh->nud_state & NUD_IN_TIMER) {
1090 if (time_before(next, jiffies + HZ/2))
1091 next = jiffies + HZ/2;
1092 if (!mod_timer(&neigh->timer, next))
1095 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1099 write_unlock(&neigh->lock);
1103 neigh_update_notify(neigh, 0);
1105 trace_neigh_timer_handler(neigh, 0);
1107 neigh_release(neigh);
1110 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1113 bool immediate_probe = false;
1115 write_lock_bh(&neigh->lock);
1118 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1123 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1124 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1125 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1126 unsigned long next, now = jiffies;
1128 atomic_set(&neigh->probes,
1129 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1130 neigh_del_timer(neigh);
1131 neigh->nud_state = NUD_INCOMPLETE;
1132 neigh->updated = now;
1133 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1135 neigh_add_timer(neigh, next);
1136 immediate_probe = true;
1138 neigh->nud_state = NUD_FAILED;
1139 neigh->updated = jiffies;
1140 write_unlock_bh(&neigh->lock);
1145 } else if (neigh->nud_state & NUD_STALE) {
1146 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1147 neigh_del_timer(neigh);
1148 neigh->nud_state = NUD_DELAY;
1149 neigh->updated = jiffies;
1150 neigh_add_timer(neigh, jiffies +
1151 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1154 if (neigh->nud_state == NUD_INCOMPLETE) {
1156 while (neigh->arp_queue_len_bytes + skb->truesize >
1157 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1158 struct sk_buff *buff;
1160 buff = __skb_dequeue(&neigh->arp_queue);
1163 neigh->arp_queue_len_bytes -= buff->truesize;
1165 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1168 __skb_queue_tail(&neigh->arp_queue, skb);
1169 neigh->arp_queue_len_bytes += skb->truesize;
1174 if (immediate_probe)
1177 write_unlock(&neigh->lock);
1179 trace_neigh_event_send_done(neigh, rc);
1183 if (neigh->nud_state & NUD_STALE)
1185 write_unlock_bh(&neigh->lock);
1187 trace_neigh_event_send_dead(neigh, 1);
1190 EXPORT_SYMBOL(__neigh_event_send);
1192 static void neigh_update_hhs(struct neighbour *neigh)
1194 struct hh_cache *hh;
1195 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1198 if (neigh->dev->header_ops)
1199 update = neigh->dev->header_ops->cache_update;
1203 if (READ_ONCE(hh->hh_len)) {
1204 write_seqlock_bh(&hh->hh_lock);
1205 update(hh, neigh->dev, neigh->ha);
1206 write_sequnlock_bh(&hh->hh_lock);
1213 /* Generic update routine.
1214 -- lladdr is new lladdr or NULL, if it is not supplied.
1215 -- new is new state.
1217 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1219 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1220 lladdr instead of overriding it
1222 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1223 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1224 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1226 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1229 Caller MUST hold reference count on the entry.
1232 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1233 u8 new, u32 flags, u32 nlmsg_pid,
1234 struct netlink_ext_ack *extack)
1236 bool ext_learn_change = false;
1240 struct net_device *dev;
1241 int update_isrouter = 0;
1243 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1245 write_lock_bh(&neigh->lock);
1248 old = neigh->nud_state;
1252 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1256 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1257 (old & (NUD_NOARP | NUD_PERMANENT)))
1260 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1261 if (flags & NEIGH_UPDATE_F_USE) {
1262 new = old & ~NUD_PERMANENT;
1263 neigh->nud_state = new;
1268 if (!(new & NUD_VALID)) {
1269 neigh_del_timer(neigh);
1270 if (old & NUD_CONNECTED)
1271 neigh_suspect(neigh);
1272 neigh->nud_state = new;
1274 notify = old & NUD_VALID;
1275 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1276 (new & NUD_FAILED)) {
1277 neigh_invalidate(neigh);
1283 /* Compare new lladdr with cached one */
1284 if (!dev->addr_len) {
1285 /* First case: device needs no address. */
1287 } else if (lladdr) {
1288 /* The second case: if something is already cached
1289 and a new address is proposed:
1291 - if they are different, check override flag
1293 if ((old & NUD_VALID) &&
1294 !memcmp(lladdr, neigh->ha, dev->addr_len))
1297 /* No address is supplied; if we know something,
1298 use it, otherwise discard the request.
1301 if (!(old & NUD_VALID)) {
1302 NL_SET_ERR_MSG(extack, "No link layer address given");
1308 /* Update confirmed timestamp for neighbour entry after we
1309 * received ARP packet even if it doesn't change IP to MAC binding.
1311 if (new & NUD_CONNECTED)
1312 neigh->confirmed = jiffies;
1314 /* If entry was valid and address is not changed,
1315 do not change entry state, if new one is STALE.
1318 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1319 if (old & NUD_VALID) {
1320 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1321 update_isrouter = 0;
1322 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1323 (old & NUD_CONNECTED)) {
1329 if (lladdr == neigh->ha && new == NUD_STALE &&
1330 !(flags & NEIGH_UPDATE_F_ADMIN))
1335 /* Update timestamp only once we know we will make a change to the
1336 * neighbour entry. Otherwise we risk to move the locktime window with
1337 * noop updates and ignore relevant ARP updates.
1339 if (new != old || lladdr != neigh->ha)
1340 neigh->updated = jiffies;
1343 neigh_del_timer(neigh);
1344 if (new & NUD_PROBE)
1345 atomic_set(&neigh->probes, 0);
1346 if (new & NUD_IN_TIMER)
1347 neigh_add_timer(neigh, (jiffies +
1348 ((new & NUD_REACHABLE) ?
1349 neigh->parms->reachable_time :
1351 neigh->nud_state = new;
1355 if (lladdr != neigh->ha) {
1356 write_seqlock(&neigh->ha_lock);
1357 memcpy(&neigh->ha, lladdr, dev->addr_len);
1358 write_sequnlock(&neigh->ha_lock);
1359 neigh_update_hhs(neigh);
1360 if (!(new & NUD_CONNECTED))
1361 neigh->confirmed = jiffies -
1362 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1367 if (new & NUD_CONNECTED)
1368 neigh_connect(neigh);
1370 neigh_suspect(neigh);
1371 if (!(old & NUD_VALID)) {
1372 struct sk_buff *skb;
1374 /* Again: avoid dead loop if something went wrong */
1376 while (neigh->nud_state & NUD_VALID &&
1377 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1378 struct dst_entry *dst = skb_dst(skb);
1379 struct neighbour *n2, *n1 = neigh;
1380 write_unlock_bh(&neigh->lock);
1384 /* Why not just use 'neigh' as-is? The problem is that
1385 * things such as shaper, eql, and sch_teql can end up
1386 * using alternative, different, neigh objects to output
1387 * the packet in the output path. So what we need to do
1388 * here is re-lookup the top-level neigh in the path so
1389 * we can reinject the packet there.
1392 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1393 n2 = dst_neigh_lookup_skb(dst, skb);
1397 n1->output(n1, skb);
1402 write_lock_bh(&neigh->lock);
1404 __skb_queue_purge(&neigh->arp_queue);
1405 neigh->arp_queue_len_bytes = 0;
1408 if (update_isrouter)
1409 neigh_update_is_router(neigh, flags, ¬ify);
1410 write_unlock_bh(&neigh->lock);
1412 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1413 neigh_update_gc_list(neigh);
1416 neigh_update_notify(neigh, nlmsg_pid);
1418 trace_neigh_update_done(neigh, err);
1423 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1424 u32 flags, u32 nlmsg_pid)
1426 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1428 EXPORT_SYMBOL(neigh_update);
1430 /* Update the neigh to listen temporarily for probe responses, even if it is
1431 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1433 void __neigh_set_probe_once(struct neighbour *neigh)
1437 neigh->updated = jiffies;
1438 if (!(neigh->nud_state & NUD_FAILED))
1440 neigh->nud_state = NUD_INCOMPLETE;
1441 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1442 neigh_add_timer(neigh,
1443 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1445 EXPORT_SYMBOL(__neigh_set_probe_once);
1447 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1448 u8 *lladdr, void *saddr,
1449 struct net_device *dev)
1451 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1452 lladdr || !dev->addr_len);
1454 neigh_update(neigh, lladdr, NUD_STALE,
1455 NEIGH_UPDATE_F_OVERRIDE, 0);
1458 EXPORT_SYMBOL(neigh_event_ns);
1460 /* called with read_lock_bh(&n->lock); */
1461 static void neigh_hh_init(struct neighbour *n)
1463 struct net_device *dev = n->dev;
1464 __be16 prot = n->tbl->protocol;
1465 struct hh_cache *hh = &n->hh;
1467 write_lock_bh(&n->lock);
1469 /* Only one thread can come in here and initialize the
1473 dev->header_ops->cache(n, hh, prot);
1475 write_unlock_bh(&n->lock);
1478 /* Slow and careful. */
1480 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1484 if (!neigh_event_send(neigh, skb)) {
1486 struct net_device *dev = neigh->dev;
1489 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1490 neigh_hh_init(neigh);
1493 __skb_pull(skb, skb_network_offset(skb));
1494 seq = read_seqbegin(&neigh->ha_lock);
1495 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1496 neigh->ha, NULL, skb->len);
1497 } while (read_seqretry(&neigh->ha_lock, seq));
1500 rc = dev_queue_xmit(skb);
1511 EXPORT_SYMBOL(neigh_resolve_output);
1513 /* As fast as possible without hh cache */
1515 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1517 struct net_device *dev = neigh->dev;
1522 __skb_pull(skb, skb_network_offset(skb));
1523 seq = read_seqbegin(&neigh->ha_lock);
1524 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1525 neigh->ha, NULL, skb->len);
1526 } while (read_seqretry(&neigh->ha_lock, seq));
1529 err = dev_queue_xmit(skb);
1536 EXPORT_SYMBOL(neigh_connected_output);
1538 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1540 return dev_queue_xmit(skb);
1542 EXPORT_SYMBOL(neigh_direct_output);
1544 static void neigh_proxy_process(struct timer_list *t)
1546 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1547 long sched_next = 0;
1548 unsigned long now = jiffies;
1549 struct sk_buff *skb, *n;
1551 spin_lock(&tbl->proxy_queue.lock);
1553 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1554 long tdif = NEIGH_CB(skb)->sched_next - now;
1557 struct net_device *dev = skb->dev;
1559 __skb_unlink(skb, &tbl->proxy_queue);
1560 if (tbl->proxy_redo && netif_running(dev)) {
1562 tbl->proxy_redo(skb);
1569 } else if (!sched_next || tdif < sched_next)
1572 del_timer(&tbl->proxy_timer);
1574 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1575 spin_unlock(&tbl->proxy_queue.lock);
1578 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1579 struct sk_buff *skb)
1581 unsigned long now = jiffies;
1583 unsigned long sched_next = now + (prandom_u32() %
1584 NEIGH_VAR(p, PROXY_DELAY));
1586 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1591 NEIGH_CB(skb)->sched_next = sched_next;
1592 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1594 spin_lock(&tbl->proxy_queue.lock);
1595 if (del_timer(&tbl->proxy_timer)) {
1596 if (time_before(tbl->proxy_timer.expires, sched_next))
1597 sched_next = tbl->proxy_timer.expires;
1601 __skb_queue_tail(&tbl->proxy_queue, skb);
1602 mod_timer(&tbl->proxy_timer, sched_next);
1603 spin_unlock(&tbl->proxy_queue.lock);
1605 EXPORT_SYMBOL(pneigh_enqueue);
1607 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1608 struct net *net, int ifindex)
1610 struct neigh_parms *p;
1612 list_for_each_entry(p, &tbl->parms_list, list) {
1613 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1614 (!p->dev && !ifindex && net_eq(net, &init_net)))
1621 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1622 struct neigh_table *tbl)
1624 struct neigh_parms *p;
1625 struct net *net = dev_net(dev);
1626 const struct net_device_ops *ops = dev->netdev_ops;
1628 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1631 refcount_set(&p->refcnt, 1);
1633 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1636 write_pnet(&p->net, net);
1637 p->sysctl_table = NULL;
1639 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1645 write_lock_bh(&tbl->lock);
1646 list_add(&p->list, &tbl->parms.list);
1647 write_unlock_bh(&tbl->lock);
1649 neigh_parms_data_state_cleanall(p);
1653 EXPORT_SYMBOL(neigh_parms_alloc);
1655 static void neigh_rcu_free_parms(struct rcu_head *head)
1657 struct neigh_parms *parms =
1658 container_of(head, struct neigh_parms, rcu_head);
1660 neigh_parms_put(parms);
1663 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1665 if (!parms || parms == &tbl->parms)
1667 write_lock_bh(&tbl->lock);
1668 list_del(&parms->list);
1670 write_unlock_bh(&tbl->lock);
1672 dev_put(parms->dev);
1673 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1675 EXPORT_SYMBOL(neigh_parms_release);
1677 static void neigh_parms_destroy(struct neigh_parms *parms)
1682 static struct lock_class_key neigh_table_proxy_queue_class;
1684 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1686 void neigh_table_init(int index, struct neigh_table *tbl)
1688 unsigned long now = jiffies;
1689 unsigned long phsize;
1691 INIT_LIST_HEAD(&tbl->parms_list);
1692 INIT_LIST_HEAD(&tbl->gc_list);
1693 list_add(&tbl->parms.list, &tbl->parms_list);
1694 write_pnet(&tbl->parms.net, &init_net);
1695 refcount_set(&tbl->parms.refcnt, 1);
1696 tbl->parms.reachable_time =
1697 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1699 tbl->stats = alloc_percpu(struct neigh_statistics);
1701 panic("cannot create neighbour cache statistics");
1703 #ifdef CONFIG_PROC_FS
1704 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1705 &neigh_stat_seq_ops, tbl))
1706 panic("cannot create neighbour proc dir entry");
1709 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1711 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1712 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1714 if (!tbl->nht || !tbl->phash_buckets)
1715 panic("cannot allocate neighbour cache hashes");
1717 if (!tbl->entry_size)
1718 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1719 tbl->key_len, NEIGH_PRIV_ALIGN);
1721 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1723 rwlock_init(&tbl->lock);
1724 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1725 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1726 tbl->parms.reachable_time);
1727 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1728 skb_queue_head_init_class(&tbl->proxy_queue,
1729 &neigh_table_proxy_queue_class);
1731 tbl->last_flush = now;
1732 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1734 neigh_tables[index] = tbl;
1736 EXPORT_SYMBOL(neigh_table_init);
1738 int neigh_table_clear(int index, struct neigh_table *tbl)
1740 neigh_tables[index] = NULL;
1741 /* It is not clean... Fix it to unload IPv6 module safely */
1742 cancel_delayed_work_sync(&tbl->gc_work);
1743 del_timer_sync(&tbl->proxy_timer);
1744 pneigh_queue_purge(&tbl->proxy_queue);
1745 neigh_ifdown(tbl, NULL);
1746 if (atomic_read(&tbl->entries))
1747 pr_crit("neighbour leakage\n");
1749 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1750 neigh_hash_free_rcu);
1753 kfree(tbl->phash_buckets);
1754 tbl->phash_buckets = NULL;
1756 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1758 free_percpu(tbl->stats);
1763 EXPORT_SYMBOL(neigh_table_clear);
1765 static struct neigh_table *neigh_find_table(int family)
1767 struct neigh_table *tbl = NULL;
1771 tbl = neigh_tables[NEIGH_ARP_TABLE];
1774 tbl = neigh_tables[NEIGH_ND_TABLE];
1777 tbl = neigh_tables[NEIGH_DN_TABLE];
1784 const struct nla_policy nda_policy[NDA_MAX+1] = {
1785 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1786 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1787 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1788 [NDA_PROBES] = { .type = NLA_U32 },
1789 [NDA_VLAN] = { .type = NLA_U16 },
1790 [NDA_PORT] = { .type = NLA_U16 },
1791 [NDA_VNI] = { .type = NLA_U32 },
1792 [NDA_IFINDEX] = { .type = NLA_U32 },
1793 [NDA_MASTER] = { .type = NLA_U32 },
1794 [NDA_PROTOCOL] = { .type = NLA_U8 },
1797 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1798 struct netlink_ext_ack *extack)
1800 struct net *net = sock_net(skb->sk);
1802 struct nlattr *dst_attr;
1803 struct neigh_table *tbl;
1804 struct neighbour *neigh;
1805 struct net_device *dev = NULL;
1809 if (nlmsg_len(nlh) < sizeof(*ndm))
1812 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1814 NL_SET_ERR_MSG(extack, "Network address not specified");
1818 ndm = nlmsg_data(nlh);
1819 if (ndm->ndm_ifindex) {
1820 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1827 tbl = neigh_find_table(ndm->ndm_family);
1829 return -EAFNOSUPPORT;
1831 if (nla_len(dst_attr) < (int)tbl->key_len) {
1832 NL_SET_ERR_MSG(extack, "Invalid network address");
1836 if (ndm->ndm_flags & NTF_PROXY) {
1837 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1844 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1845 if (neigh == NULL) {
1850 err = __neigh_update(neigh, NULL, NUD_FAILED,
1851 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1852 NETLINK_CB(skb).portid, extack);
1853 write_lock_bh(&tbl->lock);
1854 neigh_release(neigh);
1855 neigh_remove_one(neigh, tbl);
1856 write_unlock_bh(&tbl->lock);
1862 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1863 struct netlink_ext_ack *extack)
1865 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1866 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1867 struct net *net = sock_net(skb->sk);
1869 struct nlattr *tb[NDA_MAX+1];
1870 struct neigh_table *tbl;
1871 struct net_device *dev = NULL;
1872 struct neighbour *neigh;
1878 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1879 nda_policy, extack);
1885 NL_SET_ERR_MSG(extack, "Network address not specified");
1889 ndm = nlmsg_data(nlh);
1890 if (ndm->ndm_ifindex) {
1891 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1897 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1898 NL_SET_ERR_MSG(extack, "Invalid link address");
1903 tbl = neigh_find_table(ndm->ndm_family);
1905 return -EAFNOSUPPORT;
1907 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1908 NL_SET_ERR_MSG(extack, "Invalid network address");
1912 dst = nla_data(tb[NDA_DST]);
1913 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1915 if (tb[NDA_PROTOCOL])
1916 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1918 if (ndm->ndm_flags & NTF_PROXY) {
1919 struct pneigh_entry *pn;
1922 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1924 pn->flags = ndm->ndm_flags;
1926 pn->protocol = protocol;
1933 NL_SET_ERR_MSG(extack, "Device not specified");
1937 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1942 neigh = neigh_lookup(tbl, dst, dev);
1943 if (neigh == NULL) {
1944 bool exempt_from_gc;
1946 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1951 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1952 ndm->ndm_flags & NTF_EXT_LEARNED;
1953 neigh = ___neigh_create(tbl, dst, dev,
1954 ndm->ndm_flags & NTF_EXT_LEARNED,
1955 exempt_from_gc, true);
1956 if (IS_ERR(neigh)) {
1957 err = PTR_ERR(neigh);
1961 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1963 neigh_release(neigh);
1967 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1968 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1969 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1973 neigh->protocol = protocol;
1974 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1975 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1976 if (ndm->ndm_flags & NTF_ROUTER)
1977 flags |= NEIGH_UPDATE_F_ISROUTER;
1978 if (ndm->ndm_flags & NTF_USE)
1979 flags |= NEIGH_UPDATE_F_USE;
1981 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1982 NETLINK_CB(skb).portid, extack);
1983 if (!err && ndm->ndm_flags & NTF_USE) {
1984 neigh_event_send(neigh, NULL);
1987 neigh_release(neigh);
1992 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1994 struct nlattr *nest;
1996 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2001 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2002 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2003 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2004 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2005 /* approximative value for deprecated QUEUE_LEN (in packets) */
2006 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2007 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2008 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2009 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2010 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2011 NEIGH_VAR(parms, UCAST_PROBES)) ||
2012 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2013 NEIGH_VAR(parms, MCAST_PROBES)) ||
2014 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2015 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2016 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2018 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2019 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2020 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2021 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2022 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2023 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2024 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2025 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2026 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2027 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2028 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2029 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2030 nla_put_msecs(skb, NDTPA_LOCKTIME,
2031 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2032 goto nla_put_failure;
2033 return nla_nest_end(skb, nest);
2036 nla_nest_cancel(skb, nest);
2040 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2041 u32 pid, u32 seq, int type, int flags)
2043 struct nlmsghdr *nlh;
2044 struct ndtmsg *ndtmsg;
2046 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2050 ndtmsg = nlmsg_data(nlh);
2052 read_lock_bh(&tbl->lock);
2053 ndtmsg->ndtm_family = tbl->family;
2054 ndtmsg->ndtm_pad1 = 0;
2055 ndtmsg->ndtm_pad2 = 0;
2057 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2058 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2059 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2060 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2061 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2062 goto nla_put_failure;
2064 unsigned long now = jiffies;
2065 long flush_delta = now - tbl->last_flush;
2066 long rand_delta = now - tbl->last_rand;
2067 struct neigh_hash_table *nht;
2068 struct ndt_config ndc = {
2069 .ndtc_key_len = tbl->key_len,
2070 .ndtc_entry_size = tbl->entry_size,
2071 .ndtc_entries = atomic_read(&tbl->entries),
2072 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2073 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2074 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2078 nht = rcu_dereference_bh(tbl->nht);
2079 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2080 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2081 rcu_read_unlock_bh();
2083 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2084 goto nla_put_failure;
2089 struct ndt_stats ndst;
2091 memset(&ndst, 0, sizeof(ndst));
2093 for_each_possible_cpu(cpu) {
2094 struct neigh_statistics *st;
2096 st = per_cpu_ptr(tbl->stats, cpu);
2097 ndst.ndts_allocs += st->allocs;
2098 ndst.ndts_destroys += st->destroys;
2099 ndst.ndts_hash_grows += st->hash_grows;
2100 ndst.ndts_res_failed += st->res_failed;
2101 ndst.ndts_lookups += st->lookups;
2102 ndst.ndts_hits += st->hits;
2103 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2104 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2105 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2106 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2107 ndst.ndts_table_fulls += st->table_fulls;
2110 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2112 goto nla_put_failure;
2115 BUG_ON(tbl->parms.dev);
2116 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2117 goto nla_put_failure;
2119 read_unlock_bh(&tbl->lock);
2120 nlmsg_end(skb, nlh);
2124 read_unlock_bh(&tbl->lock);
2125 nlmsg_cancel(skb, nlh);
2129 static int neightbl_fill_param_info(struct sk_buff *skb,
2130 struct neigh_table *tbl,
2131 struct neigh_parms *parms,
2132 u32 pid, u32 seq, int type,
2135 struct ndtmsg *ndtmsg;
2136 struct nlmsghdr *nlh;
2138 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2142 ndtmsg = nlmsg_data(nlh);
2144 read_lock_bh(&tbl->lock);
2145 ndtmsg->ndtm_family = tbl->family;
2146 ndtmsg->ndtm_pad1 = 0;
2147 ndtmsg->ndtm_pad2 = 0;
2149 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2150 neightbl_fill_parms(skb, parms) < 0)
2153 read_unlock_bh(&tbl->lock);
2154 nlmsg_end(skb, nlh);
2157 read_unlock_bh(&tbl->lock);
2158 nlmsg_cancel(skb, nlh);
2162 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2163 [NDTA_NAME] = { .type = NLA_STRING },
2164 [NDTA_THRESH1] = { .type = NLA_U32 },
2165 [NDTA_THRESH2] = { .type = NLA_U32 },
2166 [NDTA_THRESH3] = { .type = NLA_U32 },
2167 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2168 [NDTA_PARMS] = { .type = NLA_NESTED },
2171 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2172 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2173 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2174 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2175 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2176 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2177 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2178 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2179 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2180 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2181 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2182 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2183 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2184 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2185 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2188 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2189 struct netlink_ext_ack *extack)
2191 struct net *net = sock_net(skb->sk);
2192 struct neigh_table *tbl;
2193 struct ndtmsg *ndtmsg;
2194 struct nlattr *tb[NDTA_MAX+1];
2198 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2199 nl_neightbl_policy, extack);
2203 if (tb[NDTA_NAME] == NULL) {
2208 ndtmsg = nlmsg_data(nlh);
2210 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2211 tbl = neigh_tables[tidx];
2214 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2216 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2226 * We acquire tbl->lock to be nice to the periodic timers and
2227 * make sure they always see a consistent set of values.
2229 write_lock_bh(&tbl->lock);
2231 if (tb[NDTA_PARMS]) {
2232 struct nlattr *tbp[NDTPA_MAX+1];
2233 struct neigh_parms *p;
2236 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2238 nl_ntbl_parm_policy, extack);
2240 goto errout_tbl_lock;
2242 if (tbp[NDTPA_IFINDEX])
2243 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2245 p = lookup_neigh_parms(tbl, net, ifindex);
2248 goto errout_tbl_lock;
2251 for (i = 1; i <= NDTPA_MAX; i++) {
2256 case NDTPA_QUEUE_LEN:
2257 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2258 nla_get_u32(tbp[i]) *
2259 SKB_TRUESIZE(ETH_FRAME_LEN));
2261 case NDTPA_QUEUE_LENBYTES:
2262 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2263 nla_get_u32(tbp[i]));
2265 case NDTPA_PROXY_QLEN:
2266 NEIGH_VAR_SET(p, PROXY_QLEN,
2267 nla_get_u32(tbp[i]));
2269 case NDTPA_APP_PROBES:
2270 NEIGH_VAR_SET(p, APP_PROBES,
2271 nla_get_u32(tbp[i]));
2273 case NDTPA_UCAST_PROBES:
2274 NEIGH_VAR_SET(p, UCAST_PROBES,
2275 nla_get_u32(tbp[i]));
2277 case NDTPA_MCAST_PROBES:
2278 NEIGH_VAR_SET(p, MCAST_PROBES,
2279 nla_get_u32(tbp[i]));
2281 case NDTPA_MCAST_REPROBES:
2282 NEIGH_VAR_SET(p, MCAST_REPROBES,
2283 nla_get_u32(tbp[i]));
2285 case NDTPA_BASE_REACHABLE_TIME:
2286 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2287 nla_get_msecs(tbp[i]));
2288 /* update reachable_time as well, otherwise, the change will
2289 * only be effective after the next time neigh_periodic_work
2290 * decides to recompute it (can be multiple minutes)
2293 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2295 case NDTPA_GC_STALETIME:
2296 NEIGH_VAR_SET(p, GC_STALETIME,
2297 nla_get_msecs(tbp[i]));
2299 case NDTPA_DELAY_PROBE_TIME:
2300 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2301 nla_get_msecs(tbp[i]));
2302 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2304 case NDTPA_RETRANS_TIME:
2305 NEIGH_VAR_SET(p, RETRANS_TIME,
2306 nla_get_msecs(tbp[i]));
2308 case NDTPA_ANYCAST_DELAY:
2309 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2310 nla_get_msecs(tbp[i]));
2312 case NDTPA_PROXY_DELAY:
2313 NEIGH_VAR_SET(p, PROXY_DELAY,
2314 nla_get_msecs(tbp[i]));
2316 case NDTPA_LOCKTIME:
2317 NEIGH_VAR_SET(p, LOCKTIME,
2318 nla_get_msecs(tbp[i]));
2325 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2326 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2327 !net_eq(net, &init_net))
2328 goto errout_tbl_lock;
2330 if (tb[NDTA_THRESH1])
2331 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2333 if (tb[NDTA_THRESH2])
2334 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2336 if (tb[NDTA_THRESH3])
2337 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2339 if (tb[NDTA_GC_INTERVAL])
2340 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2345 write_unlock_bh(&tbl->lock);
2350 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2351 struct netlink_ext_ack *extack)
2353 struct ndtmsg *ndtm;
2355 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2356 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2360 ndtm = nlmsg_data(nlh);
2361 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2362 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2366 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2367 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2374 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2376 const struct nlmsghdr *nlh = cb->nlh;
2377 struct net *net = sock_net(skb->sk);
2378 int family, tidx, nidx = 0;
2379 int tbl_skip = cb->args[0];
2380 int neigh_skip = cb->args[1];
2381 struct neigh_table *tbl;
2383 if (cb->strict_check) {
2384 int err = neightbl_valid_dump_info(nlh, cb->extack);
2390 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2392 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2393 struct neigh_parms *p;
2395 tbl = neigh_tables[tidx];
2399 if (tidx < tbl_skip || (family && tbl->family != family))
2402 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2403 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2408 p = list_next_entry(&tbl->parms, list);
2409 list_for_each_entry_from(p, &tbl->parms_list, list) {
2410 if (!net_eq(neigh_parms_net(p), net))
2413 if (nidx < neigh_skip)
2416 if (neightbl_fill_param_info(skb, tbl, p,
2417 NETLINK_CB(cb->skb).portid,
2435 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2436 u32 pid, u32 seq, int type, unsigned int flags)
2438 unsigned long now = jiffies;
2439 struct nda_cacheinfo ci;
2440 struct nlmsghdr *nlh;
2443 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2447 ndm = nlmsg_data(nlh);
2448 ndm->ndm_family = neigh->ops->family;
2451 ndm->ndm_flags = neigh->flags;
2452 ndm->ndm_type = neigh->type;
2453 ndm->ndm_ifindex = neigh->dev->ifindex;
2455 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2456 goto nla_put_failure;
2458 read_lock_bh(&neigh->lock);
2459 ndm->ndm_state = neigh->nud_state;
2460 if (neigh->nud_state & NUD_VALID) {
2461 char haddr[MAX_ADDR_LEN];
2463 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2464 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2465 read_unlock_bh(&neigh->lock);
2466 goto nla_put_failure;
2470 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2471 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2472 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2473 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2474 read_unlock_bh(&neigh->lock);
2476 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2477 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2478 goto nla_put_failure;
2480 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2481 goto nla_put_failure;
2483 nlmsg_end(skb, nlh);
2487 nlmsg_cancel(skb, nlh);
2491 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2492 u32 pid, u32 seq, int type, unsigned int flags,
2493 struct neigh_table *tbl)
2495 struct nlmsghdr *nlh;
2498 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2502 ndm = nlmsg_data(nlh);
2503 ndm->ndm_family = tbl->family;
2506 ndm->ndm_flags = pn->flags | NTF_PROXY;
2507 ndm->ndm_type = RTN_UNICAST;
2508 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2509 ndm->ndm_state = NUD_NONE;
2511 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2512 goto nla_put_failure;
2514 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2515 goto nla_put_failure;
2517 nlmsg_end(skb, nlh);
2521 nlmsg_cancel(skb, nlh);
2525 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2527 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2528 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2531 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2533 struct net_device *master;
2538 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2539 if (!master || master->ifindex != master_idx)
2545 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2547 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2553 struct neigh_dump_filter {
2558 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2559 struct netlink_callback *cb,
2560 struct neigh_dump_filter *filter)
2562 struct net *net = sock_net(skb->sk);
2563 struct neighbour *n;
2564 int rc, h, s_h = cb->args[1];
2565 int idx, s_idx = idx = cb->args[2];
2566 struct neigh_hash_table *nht;
2567 unsigned int flags = NLM_F_MULTI;
2569 if (filter->dev_idx || filter->master_idx)
2570 flags |= NLM_F_DUMP_FILTERED;
2573 nht = rcu_dereference_bh(tbl->nht);
2575 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2578 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2580 n = rcu_dereference_bh(n->next)) {
2581 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2583 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2584 neigh_master_filtered(n->dev, filter->master_idx))
2586 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2599 rcu_read_unlock_bh();
2605 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2606 struct netlink_callback *cb,
2607 struct neigh_dump_filter *filter)
2609 struct pneigh_entry *n;
2610 struct net *net = sock_net(skb->sk);
2611 int rc, h, s_h = cb->args[3];
2612 int idx, s_idx = idx = cb->args[4];
2613 unsigned int flags = NLM_F_MULTI;
2615 if (filter->dev_idx || filter->master_idx)
2616 flags |= NLM_F_DUMP_FILTERED;
2618 read_lock_bh(&tbl->lock);
2620 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2623 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2624 if (idx < s_idx || pneigh_net(n) != net)
2626 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2627 neigh_master_filtered(n->dev, filter->master_idx))
2629 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2631 RTM_NEWNEIGH, flags, tbl) < 0) {
2632 read_unlock_bh(&tbl->lock);
2641 read_unlock_bh(&tbl->lock);
2650 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2652 struct neigh_dump_filter *filter,
2653 struct netlink_ext_ack *extack)
2655 struct nlattr *tb[NDA_MAX + 1];
2661 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2662 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2666 ndm = nlmsg_data(nlh);
2667 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2668 ndm->ndm_state || ndm->ndm_type) {
2669 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2673 if (ndm->ndm_flags & ~NTF_PROXY) {
2674 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2678 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2679 tb, NDA_MAX, nda_policy,
2682 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2683 NDA_MAX, nda_policy, extack);
2688 for (i = 0; i <= NDA_MAX; ++i) {
2692 /* all new attributes should require strict_check */
2695 filter->dev_idx = nla_get_u32(tb[i]);
2698 filter->master_idx = nla_get_u32(tb[i]);
2702 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2711 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2713 const struct nlmsghdr *nlh = cb->nlh;
2714 struct neigh_dump_filter filter = {};
2715 struct neigh_table *tbl;
2720 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2722 /* check for full ndmsg structure presence, family member is
2723 * the same for both structures
2725 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2726 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2729 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2730 if (err < 0 && cb->strict_check)
2735 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2736 tbl = neigh_tables[t];
2740 if (t < s_t || (family && tbl->family != family))
2743 memset(&cb->args[1], 0, sizeof(cb->args) -
2744 sizeof(cb->args[0]));
2746 err = pneigh_dump_table(tbl, skb, cb, &filter);
2748 err = neigh_dump_table(tbl, skb, cb, &filter);
2757 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2758 struct neigh_table **tbl,
2759 void **dst, int *dev_idx, u8 *ndm_flags,
2760 struct netlink_ext_ack *extack)
2762 struct nlattr *tb[NDA_MAX + 1];
2766 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2767 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2771 ndm = nlmsg_data(nlh);
2772 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2774 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2778 if (ndm->ndm_flags & ~NTF_PROXY) {
2779 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2783 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2784 NDA_MAX, nda_policy, extack);
2788 *ndm_flags = ndm->ndm_flags;
2789 *dev_idx = ndm->ndm_ifindex;
2790 *tbl = neigh_find_table(ndm->ndm_family);
2792 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2793 return -EAFNOSUPPORT;
2796 for (i = 0; i <= NDA_MAX; ++i) {
2802 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2803 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2806 *dst = nla_data(tb[i]);
2809 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2817 static inline size_t neigh_nlmsg_size(void)
2819 return NLMSG_ALIGN(sizeof(struct ndmsg))
2820 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2821 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2822 + nla_total_size(sizeof(struct nda_cacheinfo))
2823 + nla_total_size(4) /* NDA_PROBES */
2824 + nla_total_size(1); /* NDA_PROTOCOL */
2827 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2830 struct sk_buff *skb;
2833 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2837 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2843 err = rtnl_unicast(skb, net, pid);
2848 static inline size_t pneigh_nlmsg_size(void)
2850 return NLMSG_ALIGN(sizeof(struct ndmsg))
2851 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2852 + nla_total_size(1); /* NDA_PROTOCOL */
2855 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2856 u32 pid, u32 seq, struct neigh_table *tbl)
2858 struct sk_buff *skb;
2861 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2865 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2871 err = rtnl_unicast(skb, net, pid);
2876 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2877 struct netlink_ext_ack *extack)
2879 struct net *net = sock_net(in_skb->sk);
2880 struct net_device *dev = NULL;
2881 struct neigh_table *tbl = NULL;
2882 struct neighbour *neigh;
2888 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2894 dev = __dev_get_by_index(net, dev_idx);
2896 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2902 NL_SET_ERR_MSG(extack, "Network address not specified");
2906 if (ndm_flags & NTF_PROXY) {
2907 struct pneigh_entry *pn;
2909 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2911 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2914 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2915 nlh->nlmsg_seq, tbl);
2919 NL_SET_ERR_MSG(extack, "No device specified");
2923 neigh = neigh_lookup(tbl, dst, dev);
2925 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2929 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2932 neigh_release(neigh);
2937 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2940 struct neigh_hash_table *nht;
2943 nht = rcu_dereference_bh(tbl->nht);
2945 read_lock(&tbl->lock); /* avoid resizes */
2946 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2947 struct neighbour *n;
2949 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2951 n = rcu_dereference_bh(n->next))
2954 read_unlock(&tbl->lock);
2955 rcu_read_unlock_bh();
2957 EXPORT_SYMBOL(neigh_for_each);
2959 /* The tbl->lock must be held as a writer and BH disabled. */
2960 void __neigh_for_each_release(struct neigh_table *tbl,
2961 int (*cb)(struct neighbour *))
2964 struct neigh_hash_table *nht;
2966 nht = rcu_dereference_protected(tbl->nht,
2967 lockdep_is_held(&tbl->lock));
2968 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2969 struct neighbour *n;
2970 struct neighbour __rcu **np;
2972 np = &nht->hash_buckets[chain];
2973 while ((n = rcu_dereference_protected(*np,
2974 lockdep_is_held(&tbl->lock))) != NULL) {
2977 write_lock(&n->lock);
2980 rcu_assign_pointer(*np,
2981 rcu_dereference_protected(n->next,
2982 lockdep_is_held(&tbl->lock)));
2986 write_unlock(&n->lock);
2988 neigh_cleanup_and_release(n);
2992 EXPORT_SYMBOL(__neigh_for_each_release);
2994 int neigh_xmit(int index, struct net_device *dev,
2995 const void *addr, struct sk_buff *skb)
2997 int err = -EAFNOSUPPORT;
2998 if (likely(index < NEIGH_NR_TABLES)) {
2999 struct neigh_table *tbl;
3000 struct neighbour *neigh;
3002 tbl = neigh_tables[index];
3006 if (index == NEIGH_ARP_TABLE) {
3007 u32 key = *((u32 *)addr);
3009 neigh = __ipv4_neigh_lookup_noref(dev, key);
3011 neigh = __neigh_lookup_noref(tbl, addr, dev);
3014 neigh = __neigh_create(tbl, addr, dev, false);
3015 err = PTR_ERR(neigh);
3016 if (IS_ERR(neigh)) {
3017 rcu_read_unlock_bh();
3020 err = neigh->output(neigh, skb);
3021 rcu_read_unlock_bh();
3023 else if (index == NEIGH_LINK_TABLE) {
3024 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3025 addr, NULL, skb->len);
3028 err = dev_queue_xmit(skb);
3036 EXPORT_SYMBOL(neigh_xmit);
3038 #ifdef CONFIG_PROC_FS
3040 static struct neighbour *neigh_get_first(struct seq_file *seq)
3042 struct neigh_seq_state *state = seq->private;
3043 struct net *net = seq_file_net(seq);
3044 struct neigh_hash_table *nht = state->nht;
3045 struct neighbour *n = NULL;
3048 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3049 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3050 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3053 if (!net_eq(dev_net(n->dev), net))
3055 if (state->neigh_sub_iter) {
3059 v = state->neigh_sub_iter(state, n, &fakep);
3063 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3065 if (n->nud_state & ~NUD_NOARP)
3068 n = rcu_dereference_bh(n->next);
3074 state->bucket = bucket;
3079 static struct neighbour *neigh_get_next(struct seq_file *seq,
3080 struct neighbour *n,
3083 struct neigh_seq_state *state = seq->private;
3084 struct net *net = seq_file_net(seq);
3085 struct neigh_hash_table *nht = state->nht;
3087 if (state->neigh_sub_iter) {
3088 void *v = state->neigh_sub_iter(state, n, pos);
3092 n = rcu_dereference_bh(n->next);
3096 if (!net_eq(dev_net(n->dev), net))
3098 if (state->neigh_sub_iter) {
3099 void *v = state->neigh_sub_iter(state, n, pos);
3104 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3107 if (n->nud_state & ~NUD_NOARP)
3110 n = rcu_dereference_bh(n->next);
3116 if (++state->bucket >= (1 << nht->hash_shift))
3119 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3127 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3129 struct neighbour *n = neigh_get_first(seq);
3134 n = neigh_get_next(seq, n, pos);
3139 return *pos ? NULL : n;
3142 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3144 struct neigh_seq_state *state = seq->private;
3145 struct net *net = seq_file_net(seq);
3146 struct neigh_table *tbl = state->tbl;
3147 struct pneigh_entry *pn = NULL;
3148 int bucket = state->bucket;
3150 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3151 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3152 pn = tbl->phash_buckets[bucket];
3153 while (pn && !net_eq(pneigh_net(pn), net))
3158 state->bucket = bucket;
3163 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3164 struct pneigh_entry *pn,
3167 struct neigh_seq_state *state = seq->private;
3168 struct net *net = seq_file_net(seq);
3169 struct neigh_table *tbl = state->tbl;
3173 } while (pn && !net_eq(pneigh_net(pn), net));
3176 if (++state->bucket > PNEIGH_HASHMASK)
3178 pn = tbl->phash_buckets[state->bucket];
3179 while (pn && !net_eq(pneigh_net(pn), net))
3191 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3193 struct pneigh_entry *pn = pneigh_get_first(seq);
3198 pn = pneigh_get_next(seq, pn, pos);
3203 return *pos ? NULL : pn;
3206 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3208 struct neigh_seq_state *state = seq->private;
3210 loff_t idxpos = *pos;
3212 rc = neigh_get_idx(seq, &idxpos);
3213 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3214 rc = pneigh_get_idx(seq, &idxpos);
3219 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3220 __acquires(tbl->lock)
3223 struct neigh_seq_state *state = seq->private;
3227 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3230 state->nht = rcu_dereference_bh(tbl->nht);
3231 read_lock(&tbl->lock);
3233 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3235 EXPORT_SYMBOL(neigh_seq_start);
3237 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3239 struct neigh_seq_state *state;
3242 if (v == SEQ_START_TOKEN) {
3243 rc = neigh_get_first(seq);
3247 state = seq->private;
3248 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3249 rc = neigh_get_next(seq, v, NULL);
3252 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3253 rc = pneigh_get_first(seq);
3255 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3256 rc = pneigh_get_next(seq, v, NULL);
3262 EXPORT_SYMBOL(neigh_seq_next);
3264 void neigh_seq_stop(struct seq_file *seq, void *v)
3265 __releases(tbl->lock)
3268 struct neigh_seq_state *state = seq->private;
3269 struct neigh_table *tbl = state->tbl;
3271 read_unlock(&tbl->lock);
3272 rcu_read_unlock_bh();
3274 EXPORT_SYMBOL(neigh_seq_stop);
3276 /* statistics via seq_file */
3278 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3280 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3284 return SEQ_START_TOKEN;
3286 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3287 if (!cpu_possible(cpu))
3290 return per_cpu_ptr(tbl->stats, cpu);
3295 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3297 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3300 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3301 if (!cpu_possible(cpu))
3304 return per_cpu_ptr(tbl->stats, cpu);
3310 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3315 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3317 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3318 struct neigh_statistics *st = v;
3320 if (v == SEQ_START_TOKEN) {
3321 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3325 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3326 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3327 atomic_read(&tbl->entries),
3338 st->rcv_probes_mcast,
3339 st->rcv_probes_ucast,
3341 st->periodic_gc_runs,
3350 static const struct seq_operations neigh_stat_seq_ops = {
3351 .start = neigh_stat_seq_start,
3352 .next = neigh_stat_seq_next,
3353 .stop = neigh_stat_seq_stop,
3354 .show = neigh_stat_seq_show,
3356 #endif /* CONFIG_PROC_FS */
3358 static void __neigh_notify(struct neighbour *n, int type, int flags,
3361 struct net *net = dev_net(n->dev);
3362 struct sk_buff *skb;
3365 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3369 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3371 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3372 WARN_ON(err == -EMSGSIZE);
3376 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3380 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3383 void neigh_app_ns(struct neighbour *n)
3385 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3387 EXPORT_SYMBOL(neigh_app_ns);
3389 #ifdef CONFIG_SYSCTL
3390 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3392 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3393 void __user *buffer, size_t *lenp, loff_t *ppos)
3396 struct ctl_table tmp = *ctl;
3398 tmp.extra1 = SYSCTL_ZERO;
3399 tmp.extra2 = &unres_qlen_max;
3402 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3403 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3406 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3410 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3415 return __in_dev_arp_parms_get_rcu(dev);
3417 return __in6_dev_nd_parms_get_rcu(dev);
3422 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3425 struct net_device *dev;
3426 int family = neigh_parms_family(p);
3429 for_each_netdev_rcu(net, dev) {
3430 struct neigh_parms *dst_p =
3431 neigh_get_dev_parms_rcu(dev, family);
3433 if (dst_p && !test_bit(index, dst_p->data_state))
3434 dst_p->data[index] = p->data[index];
3439 static void neigh_proc_update(struct ctl_table *ctl, int write)
3441 struct net_device *dev = ctl->extra1;
3442 struct neigh_parms *p = ctl->extra2;
3443 struct net *net = neigh_parms_net(p);
3444 int index = (int *) ctl->data - p->data;
3449 set_bit(index, p->data_state);
3450 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3451 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3452 if (!dev) /* NULL dev means this is default value */
3453 neigh_copy_dflt_parms(net, p, index);
3456 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3457 void __user *buffer,
3458 size_t *lenp, loff_t *ppos)
3460 struct ctl_table tmp = *ctl;
3463 tmp.extra1 = SYSCTL_ZERO;
3464 tmp.extra2 = SYSCTL_INT_MAX;
3466 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3467 neigh_proc_update(ctl, write);
3471 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3472 void __user *buffer, size_t *lenp, loff_t *ppos)
3474 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3476 neigh_proc_update(ctl, write);
3479 EXPORT_SYMBOL(neigh_proc_dointvec);
3481 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3482 void __user *buffer,
3483 size_t *lenp, loff_t *ppos)
3485 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3487 neigh_proc_update(ctl, write);
3490 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3492 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3493 void __user *buffer,
3494 size_t *lenp, loff_t *ppos)
3496 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3498 neigh_proc_update(ctl, write);
3502 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3503 void __user *buffer,
3504 size_t *lenp, loff_t *ppos)
3506 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3508 neigh_proc_update(ctl, write);
3511 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3513 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3514 void __user *buffer,
3515 size_t *lenp, loff_t *ppos)
3517 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3519 neigh_proc_update(ctl, write);
3523 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3524 void __user *buffer,
3525 size_t *lenp, loff_t *ppos)
3527 struct neigh_parms *p = ctl->extra2;
3530 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3531 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3532 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3533 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3537 if (write && ret == 0) {
3538 /* update reachable_time as well, otherwise, the change will
3539 * only be effective after the next time neigh_periodic_work
3540 * decides to recompute it
3543 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3548 #define NEIGH_PARMS_DATA_OFFSET(index) \
3549 (&((struct neigh_parms *) 0)->data[index])
3551 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3552 [NEIGH_VAR_ ## attr] = { \
3554 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3555 .maxlen = sizeof(int), \
3557 .proc_handler = proc, \
3560 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3561 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3563 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3564 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3566 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3567 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3569 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3570 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3572 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3573 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3575 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3576 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3578 static struct neigh_sysctl_table {
3579 struct ctl_table_header *sysctl_header;
3580 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3581 } neigh_sysctl_template __read_mostly = {
3583 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3584 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3585 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3586 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3587 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3588 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3589 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3590 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3591 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3592 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3593 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3594 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3595 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3596 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3597 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3598 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3599 [NEIGH_VAR_GC_INTERVAL] = {
3600 .procname = "gc_interval",
3601 .maxlen = sizeof(int),
3603 .proc_handler = proc_dointvec_jiffies,
3605 [NEIGH_VAR_GC_THRESH1] = {
3606 .procname = "gc_thresh1",
3607 .maxlen = sizeof(int),
3609 .extra1 = SYSCTL_ZERO,
3610 .extra2 = SYSCTL_INT_MAX,
3611 .proc_handler = proc_dointvec_minmax,
3613 [NEIGH_VAR_GC_THRESH2] = {
3614 .procname = "gc_thresh2",
3615 .maxlen = sizeof(int),
3617 .extra1 = SYSCTL_ZERO,
3618 .extra2 = SYSCTL_INT_MAX,
3619 .proc_handler = proc_dointvec_minmax,
3621 [NEIGH_VAR_GC_THRESH3] = {
3622 .procname = "gc_thresh3",
3623 .maxlen = sizeof(int),
3625 .extra1 = SYSCTL_ZERO,
3626 .extra2 = SYSCTL_INT_MAX,
3627 .proc_handler = proc_dointvec_minmax,
3633 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3634 proc_handler *handler)
3637 struct neigh_sysctl_table *t;
3638 const char *dev_name_source;
3639 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3642 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3646 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3647 t->neigh_vars[i].data += (long) p;
3648 t->neigh_vars[i].extra1 = dev;
3649 t->neigh_vars[i].extra2 = p;
3653 dev_name_source = dev->name;
3654 /* Terminate the table early */
3655 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3656 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3658 struct neigh_table *tbl = p->tbl;
3659 dev_name_source = "default";
3660 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3661 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3662 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3663 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3668 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3670 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3671 /* RetransTime (in milliseconds)*/
3672 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3673 /* ReachableTime (in milliseconds) */
3674 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3676 /* Those handlers will update p->reachable_time after
3677 * base_reachable_time(_ms) is set to ensure the new timer starts being
3678 * applied after the next neighbour update instead of waiting for
3679 * neigh_periodic_work to update its value (can be multiple minutes)
3680 * So any handler that replaces them should do this as well
3683 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3684 neigh_proc_base_reachable_time;
3685 /* ReachableTime (in milliseconds) */
3686 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3687 neigh_proc_base_reachable_time;
3690 /* Don't export sysctls to unprivileged users */
3691 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3692 t->neigh_vars[0].procname = NULL;
3694 switch (neigh_parms_family(p)) {
3705 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3706 p_name, dev_name_source);
3708 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3709 if (!t->sysctl_header)
3712 p->sysctl_table = t;
3720 EXPORT_SYMBOL(neigh_sysctl_register);
3722 void neigh_sysctl_unregister(struct neigh_parms *p)
3724 if (p->sysctl_table) {
3725 struct neigh_sysctl_table *t = p->sysctl_table;
3726 p->sysctl_table = NULL;
3727 unregister_net_sysctl_table(t->sysctl_header);
3731 EXPORT_SYMBOL(neigh_sysctl_unregister);
3733 #endif /* CONFIG_SYSCTL */
3735 static int __init neigh_init(void)
3737 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3738 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3739 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3741 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3743 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3748 subsys_initcall(neigh_init);