1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Generic address resolution entity
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 #include <linux/slab.h>
17 #include <linux/kmemleak.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
25 #include <linux/sysctl.h>
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
42 #include <trace/events/neigh.h>
46 #define neigh_dbg(level, fmt, ...) \
48 if (level <= NEIGH_DEBUG) \
49 pr_debug(fmt, ##__VA_ARGS__); \
52 #define PNEIGH_HASHMASK 0xF
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 struct net_device *dev);
62 static const struct seq_operations neigh_stat_seq_ops;
66 Neighbour hash table buckets are protected with rwlock tbl->lock.
68 - All the scans/updates to hash buckets MUST be made under this lock.
69 - NOTHING clever should be made under this lock: no callbacks
70 to protocol backends, no attempts to send something to network.
71 It will result in deadlocks, if backend/driver wants to use neighbour
73 - If the entry requires some non-trivial actions, increase
74 its reference count and release table lock.
76 Neighbour entries are protected:
77 - with reference count.
78 - with rwlock neigh->lock
80 Reference count prevents destruction.
82 neigh->lock mainly serializes ll address data and its validity state.
83 However, the same lock is used to protect another entry fields:
87 Again, nothing clever shall be made under neigh->lock,
88 the most complicated procedure, which we allow is dev->hard_header.
89 It is supposed, that dev->hard_header is simplistic and does
90 not make callbacks to neighbour tables.
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 trace_neigh_cleanup_and_release(neigh, 0);
102 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
104 neigh_release(neigh);
108 * It is random distribution in the interval (1/2)*base...(3/2)*base.
109 * It corresponds to default IPv6 settings and is not overridable,
110 * because it is really reasonable choice.
113 unsigned long neigh_rand_reach_time(unsigned long base)
115 return base ? (prandom_u32() % base) + (base >> 1) : 0;
117 EXPORT_SYMBOL(neigh_rand_reach_time);
119 static void neigh_mark_dead(struct neighbour *n)
122 if (!list_empty(&n->gc_list)) {
123 list_del_init(&n->gc_list);
124 atomic_dec(&n->tbl->gc_entries);
128 static void neigh_update_gc_list(struct neighbour *n)
130 bool on_gc_list, exempt_from_gc;
132 write_lock_bh(&n->tbl->lock);
133 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
155 write_unlock(&n->lock);
156 write_unlock_bh(&n->tbl->lock);
159 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
165 if (!(flags & NEIGH_UPDATE_F_ADMIN))
168 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
169 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
170 if (ndm_flags & NTF_EXT_LEARNED)
171 neigh->flags |= NTF_EXT_LEARNED;
173 neigh->flags &= ~NTF_EXT_LEARNED;
181 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
182 struct neigh_table *tbl)
186 write_lock(&n->lock);
187 if (refcount_read(&n->refcnt) == 1) {
188 struct neighbour *neigh;
190 neigh = rcu_dereference_protected(n->next,
191 lockdep_is_held(&tbl->lock));
192 rcu_assign_pointer(*np, neigh);
196 write_unlock(&n->lock);
198 neigh_cleanup_and_release(n);
202 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
204 struct neigh_hash_table *nht;
205 void *pkey = ndel->primary_key;
208 struct neighbour __rcu **np;
210 nht = rcu_dereference_protected(tbl->nht,
211 lockdep_is_held(&tbl->lock));
212 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
213 hash_val = hash_val >> (32 - nht->hash_shift);
215 np = &nht->hash_buckets[hash_val];
216 while ((n = rcu_dereference_protected(*np,
217 lockdep_is_held(&tbl->lock)))) {
219 return neigh_del(n, np, tbl);
225 static int neigh_forced_gc(struct neigh_table *tbl)
227 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
228 unsigned long tref = jiffies - 5 * HZ;
229 struct neighbour *n, *tmp;
232 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
234 write_lock_bh(&tbl->lock);
236 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
237 if (refcount_read(&n->refcnt) == 1) {
240 write_lock(&n->lock);
241 if ((n->nud_state == NUD_FAILED) ||
242 (n->nud_state == NUD_NOARP) ||
243 (tbl->is_multicast &&
244 tbl->is_multicast(n->primary_key)) ||
245 !time_in_range(n->updated, tref, jiffies))
247 write_unlock(&n->lock);
249 if (remove && neigh_remove_one(n, tbl))
251 if (shrunk >= max_clean)
256 tbl->last_flush = jiffies;
258 write_unlock_bh(&tbl->lock);
263 static void neigh_add_timer(struct neighbour *n, unsigned long when)
265 /* Use safe distance from the jiffies - LONG_MAX point while timer
266 * is running in DELAY/PROBE state but still show to user space
267 * large times in the past.
269 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
272 if (!time_in_range(n->confirmed, mint, jiffies))
274 if (time_before(n->used, n->confirmed))
275 n->used = n->confirmed;
276 if (unlikely(mod_timer(&n->timer, when))) {
277 printk("NEIGH: BUG, double timer add, state is %x\n",
283 static int neigh_del_timer(struct neighbour *n)
285 if ((n->nud_state & NUD_IN_TIMER) &&
286 del_timer(&n->timer)) {
293 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
295 struct sk_buff_head tmp;
299 skb_queue_head_init(&tmp);
300 spin_lock_irqsave(&list->lock, flags);
301 skb = skb_peek(list);
302 while (skb != NULL) {
303 struct sk_buff *skb_next = skb_peek_next(skb, list);
304 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
305 __skb_unlink(skb, list);
306 __skb_queue_tail(&tmp, skb);
310 spin_unlock_irqrestore(&list->lock, flags);
312 while ((skb = __skb_dequeue(&tmp))) {
318 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
322 struct neigh_hash_table *nht;
324 nht = rcu_dereference_protected(tbl->nht,
325 lockdep_is_held(&tbl->lock));
327 for (i = 0; i < (1 << nht->hash_shift); i++) {
329 struct neighbour __rcu **np = &nht->hash_buckets[i];
331 while ((n = rcu_dereference_protected(*np,
332 lockdep_is_held(&tbl->lock))) != NULL) {
333 if (dev && n->dev != dev) {
337 if (skip_perm && n->nud_state & NUD_PERMANENT) {
341 rcu_assign_pointer(*np,
342 rcu_dereference_protected(n->next,
343 lockdep_is_held(&tbl->lock)));
344 write_lock(&n->lock);
347 if (refcount_read(&n->refcnt) != 1) {
348 /* The most unpleasant situation.
349 We must destroy neighbour entry,
350 but someone still uses it.
352 The destroy will be delayed until
353 the last user releases us, but
354 we must kill timers etc. and move
357 __skb_queue_purge(&n->arp_queue);
358 n->arp_queue_len_bytes = 0;
359 n->output = neigh_blackhole;
360 if (n->nud_state & NUD_VALID)
361 n->nud_state = NUD_NOARP;
363 n->nud_state = NUD_NONE;
364 neigh_dbg(2, "neigh %p is stray\n", n);
366 write_unlock(&n->lock);
367 neigh_cleanup_and_release(n);
372 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
374 write_lock_bh(&tbl->lock);
375 neigh_flush_dev(tbl, dev, false);
376 write_unlock_bh(&tbl->lock);
378 EXPORT_SYMBOL(neigh_changeaddr);
380 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
383 write_lock_bh(&tbl->lock);
384 neigh_flush_dev(tbl, dev, skip_perm);
385 pneigh_ifdown_and_unlock(tbl, dev);
386 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
387 if (skb_queue_empty_lockless(&tbl->proxy_queue))
388 del_timer_sync(&tbl->proxy_timer);
392 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
394 __neigh_ifdown(tbl, dev, true);
397 EXPORT_SYMBOL(neigh_carrier_down);
399 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
401 __neigh_ifdown(tbl, dev, false);
404 EXPORT_SYMBOL(neigh_ifdown);
406 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
407 struct net_device *dev,
408 u8 flags, bool exempt_from_gc)
410 struct neighbour *n = NULL;
411 unsigned long now = jiffies;
417 entries = atomic_inc_return(&tbl->gc_entries) - 1;
418 if (entries >= tbl->gc_thresh3 ||
419 (entries >= tbl->gc_thresh2 &&
420 time_after(now, tbl->last_flush + 5 * HZ))) {
421 if (!neigh_forced_gc(tbl) &&
422 entries >= tbl->gc_thresh3) {
423 net_info_ratelimited("%s: neighbor table overflow!\n",
425 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
431 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
435 __skb_queue_head_init(&n->arp_queue);
436 rwlock_init(&n->lock);
437 seqlock_init(&n->ha_lock);
438 n->updated = n->used = now;
439 n->nud_state = NUD_NONE;
440 n->output = neigh_blackhole;
442 seqlock_init(&n->hh.hh_lock);
443 n->parms = neigh_parms_clone(&tbl->parms);
444 timer_setup(&n->timer, neigh_timer_handler, 0);
446 NEIGH_CACHE_STAT_INC(tbl, allocs);
448 refcount_set(&n->refcnt, 1);
450 INIT_LIST_HEAD(&n->gc_list);
452 atomic_inc(&tbl->entries);
458 atomic_dec(&tbl->gc_entries);
462 static void neigh_get_hash_rnd(u32 *x)
464 *x = get_random_u32() | 1;
467 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
469 size_t size = (1 << shift) * sizeof(struct neighbour *);
470 struct neigh_hash_table *ret;
471 struct neighbour __rcu **buckets;
474 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
477 if (size <= PAGE_SIZE) {
478 buckets = kzalloc(size, GFP_ATOMIC);
480 buckets = (struct neighbour __rcu **)
481 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
483 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
489 ret->hash_buckets = buckets;
490 ret->hash_shift = shift;
491 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
492 neigh_get_hash_rnd(&ret->hash_rnd[i]);
496 static void neigh_hash_free_rcu(struct rcu_head *head)
498 struct neigh_hash_table *nht = container_of(head,
499 struct neigh_hash_table,
501 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
502 struct neighbour __rcu **buckets = nht->hash_buckets;
504 if (size <= PAGE_SIZE) {
507 kmemleak_free(buckets);
508 free_pages((unsigned long)buckets, get_order(size));
513 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
514 unsigned long new_shift)
516 unsigned int i, hash;
517 struct neigh_hash_table *new_nht, *old_nht;
519 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
521 old_nht = rcu_dereference_protected(tbl->nht,
522 lockdep_is_held(&tbl->lock));
523 new_nht = neigh_hash_alloc(new_shift);
527 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
528 struct neighbour *n, *next;
530 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
531 lockdep_is_held(&tbl->lock));
534 hash = tbl->hash(n->primary_key, n->dev,
537 hash >>= (32 - new_nht->hash_shift);
538 next = rcu_dereference_protected(n->next,
539 lockdep_is_held(&tbl->lock));
541 rcu_assign_pointer(n->next,
542 rcu_dereference_protected(
543 new_nht->hash_buckets[hash],
544 lockdep_is_held(&tbl->lock)));
545 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
549 rcu_assign_pointer(tbl->nht, new_nht);
550 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
554 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
555 struct net_device *dev)
559 NEIGH_CACHE_STAT_INC(tbl, lookups);
562 n = __neigh_lookup_noref(tbl, pkey, dev);
564 if (!refcount_inc_not_zero(&n->refcnt))
566 NEIGH_CACHE_STAT_INC(tbl, hits);
569 rcu_read_unlock_bh();
572 EXPORT_SYMBOL(neigh_lookup);
574 static struct neighbour *
575 ___neigh_create(struct neigh_table *tbl, const void *pkey,
576 struct net_device *dev, u8 flags,
577 bool exempt_from_gc, bool want_ref)
579 u32 hash_val, key_len = tbl->key_len;
580 struct neighbour *n1, *rc, *n;
581 struct neigh_hash_table *nht;
584 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
585 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
587 rc = ERR_PTR(-ENOBUFS);
591 memcpy(n->primary_key, pkey, key_len);
595 /* Protocol specific setup. */
596 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
598 goto out_neigh_release;
601 if (dev->netdev_ops->ndo_neigh_construct) {
602 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
605 goto out_neigh_release;
609 /* Device specific setup. */
610 if (n->parms->neigh_setup &&
611 (error = n->parms->neigh_setup(n)) < 0) {
613 goto out_neigh_release;
616 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
618 write_lock_bh(&tbl->lock);
619 nht = rcu_dereference_protected(tbl->nht,
620 lockdep_is_held(&tbl->lock));
622 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
623 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
625 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
627 if (n->parms->dead) {
628 rc = ERR_PTR(-EINVAL);
632 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
633 lockdep_is_held(&tbl->lock));
635 n1 = rcu_dereference_protected(n1->next,
636 lockdep_is_held(&tbl->lock))) {
637 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
647 list_add_tail(&n->gc_list, &n->tbl->gc_list);
651 rcu_assign_pointer(n->next,
652 rcu_dereference_protected(nht->hash_buckets[hash_val],
653 lockdep_is_held(&tbl->lock)));
654 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
655 write_unlock_bh(&tbl->lock);
656 neigh_dbg(2, "neigh %p is created\n", n);
661 write_unlock_bh(&tbl->lock);
664 atomic_dec(&tbl->gc_entries);
669 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
670 struct net_device *dev, bool want_ref)
672 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref);
674 EXPORT_SYMBOL(__neigh_create);
676 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
678 u32 hash_val = *(u32 *)(pkey + key_len - 4);
679 hash_val ^= (hash_val >> 16);
680 hash_val ^= hash_val >> 8;
681 hash_val ^= hash_val >> 4;
682 hash_val &= PNEIGH_HASHMASK;
686 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
689 unsigned int key_len,
690 struct net_device *dev)
693 if (!memcmp(n->key, pkey, key_len) &&
694 net_eq(pneigh_net(n), net) &&
695 (n->dev == dev || !n->dev))
702 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
703 struct net *net, const void *pkey, struct net_device *dev)
705 unsigned int key_len = tbl->key_len;
706 u32 hash_val = pneigh_hash(pkey, key_len);
708 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
709 net, pkey, key_len, dev);
711 EXPORT_SYMBOL_GPL(__pneigh_lookup);
713 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
714 struct net *net, const void *pkey,
715 struct net_device *dev, int creat)
717 struct pneigh_entry *n;
718 unsigned int key_len = tbl->key_len;
719 u32 hash_val = pneigh_hash(pkey, key_len);
721 read_lock_bh(&tbl->lock);
722 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
723 net, pkey, key_len, dev);
724 read_unlock_bh(&tbl->lock);
731 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
735 write_pnet(&n->net, net);
736 memcpy(n->key, pkey, key_len);
741 if (tbl->pconstructor && tbl->pconstructor(n)) {
749 write_lock_bh(&tbl->lock);
750 n->next = tbl->phash_buckets[hash_val];
751 tbl->phash_buckets[hash_val] = n;
752 write_unlock_bh(&tbl->lock);
756 EXPORT_SYMBOL(pneigh_lookup);
759 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
760 struct net_device *dev)
762 struct pneigh_entry *n, **np;
763 unsigned int key_len = tbl->key_len;
764 u32 hash_val = pneigh_hash(pkey, key_len);
766 write_lock_bh(&tbl->lock);
767 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
769 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
770 net_eq(pneigh_net(n), net)) {
772 write_unlock_bh(&tbl->lock);
773 if (tbl->pdestructor)
781 write_unlock_bh(&tbl->lock);
785 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
786 struct net_device *dev)
788 struct pneigh_entry *n, **np, *freelist = NULL;
791 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
792 np = &tbl->phash_buckets[h];
793 while ((n = *np) != NULL) {
794 if (!dev || n->dev == dev) {
803 write_unlock_bh(&tbl->lock);
804 while ((n = freelist)) {
807 if (tbl->pdestructor)
816 static void neigh_parms_destroy(struct neigh_parms *parms);
818 static inline void neigh_parms_put(struct neigh_parms *parms)
820 if (refcount_dec_and_test(&parms->refcnt))
821 neigh_parms_destroy(parms);
825 * neighbour must already be out of the table;
828 void neigh_destroy(struct neighbour *neigh)
830 struct net_device *dev = neigh->dev;
832 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
835 pr_warn("Destroying alive neighbour %p\n", neigh);
840 if (neigh_del_timer(neigh))
841 pr_warn("Impossible event\n");
843 write_lock_bh(&neigh->lock);
844 __skb_queue_purge(&neigh->arp_queue);
845 write_unlock_bh(&neigh->lock);
846 neigh->arp_queue_len_bytes = 0;
848 if (dev->netdev_ops->ndo_neigh_destroy)
849 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
852 neigh_parms_put(neigh->parms);
854 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
856 atomic_dec(&neigh->tbl->entries);
857 kfree_rcu(neigh, rcu);
859 EXPORT_SYMBOL(neigh_destroy);
861 /* Neighbour state is suspicious;
864 Called with write_locked neigh.
866 static void neigh_suspect(struct neighbour *neigh)
868 neigh_dbg(2, "neigh %p is suspected\n", neigh);
870 neigh->output = neigh->ops->output;
873 /* Neighbour state is OK;
876 Called with write_locked neigh.
878 static void neigh_connect(struct neighbour *neigh)
880 neigh_dbg(2, "neigh %p is connected\n", neigh);
882 neigh->output = neigh->ops->connected_output;
885 static void neigh_periodic_work(struct work_struct *work)
887 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
889 struct neighbour __rcu **np;
891 struct neigh_hash_table *nht;
893 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
895 write_lock_bh(&tbl->lock);
896 nht = rcu_dereference_protected(tbl->nht,
897 lockdep_is_held(&tbl->lock));
900 * periodically recompute ReachableTime from random function
903 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
904 struct neigh_parms *p;
905 tbl->last_rand = jiffies;
906 list_for_each_entry(p, &tbl->parms_list, list)
908 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
911 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
914 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
915 np = &nht->hash_buckets[i];
917 while ((n = rcu_dereference_protected(*np,
918 lockdep_is_held(&tbl->lock))) != NULL) {
921 write_lock(&n->lock);
923 state = n->nud_state;
924 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
925 (n->flags & NTF_EXT_LEARNED)) {
926 write_unlock(&n->lock);
930 if (time_before(n->used, n->confirmed) &&
931 time_is_before_eq_jiffies(n->confirmed))
932 n->used = n->confirmed;
934 if (refcount_read(&n->refcnt) == 1 &&
935 (state == NUD_FAILED ||
936 !time_in_range_open(jiffies, n->used,
937 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
940 write_unlock(&n->lock);
941 neigh_cleanup_and_release(n);
944 write_unlock(&n->lock);
950 * It's fine to release lock here, even if hash table
951 * grows while we are preempted.
953 write_unlock_bh(&tbl->lock);
955 write_lock_bh(&tbl->lock);
956 nht = rcu_dereference_protected(tbl->nht,
957 lockdep_is_held(&tbl->lock));
960 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
961 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
962 * BASE_REACHABLE_TIME.
964 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
965 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
966 write_unlock_bh(&tbl->lock);
969 static __inline__ int neigh_max_probes(struct neighbour *n)
971 struct neigh_parms *p = n->parms;
972 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
973 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
974 NEIGH_VAR(p, MCAST_PROBES));
977 static void neigh_invalidate(struct neighbour *neigh)
978 __releases(neigh->lock)
979 __acquires(neigh->lock)
983 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
984 neigh_dbg(2, "neigh %p is failed\n", neigh);
985 neigh->updated = jiffies;
987 /* It is very thin place. report_unreachable is very complicated
988 routine. Particularly, it can hit the same neighbour entry!
990 So that, we try to be accurate and avoid dead loop. --ANK
992 while (neigh->nud_state == NUD_FAILED &&
993 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
994 write_unlock(&neigh->lock);
995 neigh->ops->error_report(neigh, skb);
996 write_lock(&neigh->lock);
998 __skb_queue_purge(&neigh->arp_queue);
999 neigh->arp_queue_len_bytes = 0;
1002 static void neigh_probe(struct neighbour *neigh)
1003 __releases(neigh->lock)
1005 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1006 /* keep skb alive even if arp_queue overflows */
1008 skb = skb_clone(skb, GFP_ATOMIC);
1009 write_unlock(&neigh->lock);
1010 if (neigh->ops->solicit)
1011 neigh->ops->solicit(neigh, skb);
1012 atomic_inc(&neigh->probes);
1016 /* Called when a timer expires for a neighbour entry. */
1018 static void neigh_timer_handler(struct timer_list *t)
1020 unsigned long now, next;
1021 struct neighbour *neigh = from_timer(neigh, t, timer);
1025 write_lock(&neigh->lock);
1027 state = neigh->nud_state;
1031 if (!(state & NUD_IN_TIMER))
1034 if (state & NUD_REACHABLE) {
1035 if (time_before_eq(now,
1036 neigh->confirmed + neigh->parms->reachable_time)) {
1037 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1038 next = neigh->confirmed + neigh->parms->reachable_time;
1039 } else if (time_before_eq(now,
1041 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1042 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1043 neigh->nud_state = NUD_DELAY;
1044 neigh->updated = jiffies;
1045 neigh_suspect(neigh);
1046 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1048 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1049 neigh->nud_state = NUD_STALE;
1050 neigh->updated = jiffies;
1051 neigh_suspect(neigh);
1054 } else if (state & NUD_DELAY) {
1055 if (time_before_eq(now,
1057 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1058 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1059 neigh->nud_state = NUD_REACHABLE;
1060 neigh->updated = jiffies;
1061 neigh_connect(neigh);
1063 next = neigh->confirmed + neigh->parms->reachable_time;
1065 neigh_dbg(2, "neigh %p is probed\n", neigh);
1066 neigh->nud_state = NUD_PROBE;
1067 neigh->updated = jiffies;
1068 atomic_set(&neigh->probes, 0);
1070 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1073 /* NUD_PROBE|NUD_INCOMPLETE */
1074 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1077 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1078 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1079 neigh->nud_state = NUD_FAILED;
1081 neigh_invalidate(neigh);
1085 if (neigh->nud_state & NUD_IN_TIMER) {
1086 if (time_before(next, jiffies + HZ/2))
1087 next = jiffies + HZ/2;
1088 if (!mod_timer(&neigh->timer, next))
1091 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1095 write_unlock(&neigh->lock);
1099 neigh_update_notify(neigh, 0);
1101 trace_neigh_timer_handler(neigh, 0);
1103 neigh_release(neigh);
1106 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1109 bool immediate_probe = false;
1111 write_lock_bh(&neigh->lock);
1114 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1119 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1120 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1121 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1122 unsigned long next, now = jiffies;
1124 atomic_set(&neigh->probes,
1125 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1126 neigh_del_timer(neigh);
1127 neigh->nud_state = NUD_INCOMPLETE;
1128 neigh->updated = now;
1129 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1131 neigh_add_timer(neigh, next);
1132 immediate_probe = true;
1134 neigh->nud_state = NUD_FAILED;
1135 neigh->updated = jiffies;
1136 write_unlock_bh(&neigh->lock);
1141 } else if (neigh->nud_state & NUD_STALE) {
1142 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1143 neigh_del_timer(neigh);
1144 neigh->nud_state = NUD_DELAY;
1145 neigh->updated = jiffies;
1146 neigh_add_timer(neigh, jiffies +
1147 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1150 if (neigh->nud_state == NUD_INCOMPLETE) {
1152 while (neigh->arp_queue_len_bytes + skb->truesize >
1153 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1154 struct sk_buff *buff;
1156 buff = __skb_dequeue(&neigh->arp_queue);
1159 neigh->arp_queue_len_bytes -= buff->truesize;
1161 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1164 __skb_queue_tail(&neigh->arp_queue, skb);
1165 neigh->arp_queue_len_bytes += skb->truesize;
1170 if (immediate_probe)
1173 write_unlock(&neigh->lock);
1175 trace_neigh_event_send_done(neigh, rc);
1179 if (neigh->nud_state & NUD_STALE)
1181 write_unlock_bh(&neigh->lock);
1183 trace_neigh_event_send_dead(neigh, 1);
1186 EXPORT_SYMBOL(__neigh_event_send);
1188 static void neigh_update_hhs(struct neighbour *neigh)
1190 struct hh_cache *hh;
1191 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1194 if (neigh->dev->header_ops)
1195 update = neigh->dev->header_ops->cache_update;
1199 if (READ_ONCE(hh->hh_len)) {
1200 write_seqlock_bh(&hh->hh_lock);
1201 update(hh, neigh->dev, neigh->ha);
1202 write_sequnlock_bh(&hh->hh_lock);
1209 /* Generic update routine.
1210 -- lladdr is new lladdr or NULL, if it is not supplied.
1211 -- new is new state.
1213 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1215 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1216 lladdr instead of overriding it
1218 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1219 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1220 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1222 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1225 Caller MUST hold reference count on the entry.
1228 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1229 u8 new, u32 flags, u32 nlmsg_pid,
1230 struct netlink_ext_ack *extack)
1232 bool ext_learn_change = false;
1236 struct net_device *dev;
1237 int update_isrouter = 0;
1239 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1241 write_lock_bh(&neigh->lock);
1244 old = neigh->nud_state;
1248 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1252 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1253 (old & (NUD_NOARP | NUD_PERMANENT)))
1256 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1257 if (flags & NEIGH_UPDATE_F_USE) {
1258 new = old & ~NUD_PERMANENT;
1259 neigh->nud_state = new;
1264 if (!(new & NUD_VALID)) {
1265 neigh_del_timer(neigh);
1266 if (old & NUD_CONNECTED)
1267 neigh_suspect(neigh);
1268 neigh->nud_state = new;
1270 notify = old & NUD_VALID;
1271 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1272 (new & NUD_FAILED)) {
1273 neigh_invalidate(neigh);
1279 /* Compare new lladdr with cached one */
1280 if (!dev->addr_len) {
1281 /* First case: device needs no address. */
1283 } else if (lladdr) {
1284 /* The second case: if something is already cached
1285 and a new address is proposed:
1287 - if they are different, check override flag
1289 if ((old & NUD_VALID) &&
1290 !memcmp(lladdr, neigh->ha, dev->addr_len))
1293 /* No address is supplied; if we know something,
1294 use it, otherwise discard the request.
1297 if (!(old & NUD_VALID)) {
1298 NL_SET_ERR_MSG(extack, "No link layer address given");
1304 /* Update confirmed timestamp for neighbour entry after we
1305 * received ARP packet even if it doesn't change IP to MAC binding.
1307 if (new & NUD_CONNECTED)
1308 neigh->confirmed = jiffies;
1310 /* If entry was valid and address is not changed,
1311 do not change entry state, if new one is STALE.
1314 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1315 if (old & NUD_VALID) {
1316 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1317 update_isrouter = 0;
1318 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1319 (old & NUD_CONNECTED)) {
1325 if (lladdr == neigh->ha && new == NUD_STALE &&
1326 !(flags & NEIGH_UPDATE_F_ADMIN))
1331 /* Update timestamp only once we know we will make a change to the
1332 * neighbour entry. Otherwise we risk to move the locktime window with
1333 * noop updates and ignore relevant ARP updates.
1335 if (new != old || lladdr != neigh->ha)
1336 neigh->updated = jiffies;
1339 neigh_del_timer(neigh);
1340 if (new & NUD_PROBE)
1341 atomic_set(&neigh->probes, 0);
1342 if (new & NUD_IN_TIMER)
1343 neigh_add_timer(neigh, (jiffies +
1344 ((new & NUD_REACHABLE) ?
1345 neigh->parms->reachable_time :
1347 neigh->nud_state = new;
1351 if (lladdr != neigh->ha) {
1352 write_seqlock(&neigh->ha_lock);
1353 memcpy(&neigh->ha, lladdr, dev->addr_len);
1354 write_sequnlock(&neigh->ha_lock);
1355 neigh_update_hhs(neigh);
1356 if (!(new & NUD_CONNECTED))
1357 neigh->confirmed = jiffies -
1358 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1363 if (new & NUD_CONNECTED)
1364 neigh_connect(neigh);
1366 neigh_suspect(neigh);
1367 if (!(old & NUD_VALID)) {
1368 struct sk_buff *skb;
1370 /* Again: avoid dead loop if something went wrong */
1372 while (neigh->nud_state & NUD_VALID &&
1373 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1374 struct dst_entry *dst = skb_dst(skb);
1375 struct neighbour *n2, *n1 = neigh;
1376 write_unlock_bh(&neigh->lock);
1380 /* Why not just use 'neigh' as-is? The problem is that
1381 * things such as shaper, eql, and sch_teql can end up
1382 * using alternative, different, neigh objects to output
1383 * the packet in the output path. So what we need to do
1384 * here is re-lookup the top-level neigh in the path so
1385 * we can reinject the packet there.
1388 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1389 n2 = dst_neigh_lookup_skb(dst, skb);
1393 n1->output(n1, skb);
1398 write_lock_bh(&neigh->lock);
1400 __skb_queue_purge(&neigh->arp_queue);
1401 neigh->arp_queue_len_bytes = 0;
1404 if (update_isrouter)
1405 neigh_update_is_router(neigh, flags, ¬ify);
1406 write_unlock_bh(&neigh->lock);
1408 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1409 neigh_update_gc_list(neigh);
1412 neigh_update_notify(neigh, nlmsg_pid);
1414 trace_neigh_update_done(neigh, err);
1419 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1420 u32 flags, u32 nlmsg_pid)
1422 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1424 EXPORT_SYMBOL(neigh_update);
1426 /* Update the neigh to listen temporarily for probe responses, even if it is
1427 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1429 void __neigh_set_probe_once(struct neighbour *neigh)
1433 neigh->updated = jiffies;
1434 if (!(neigh->nud_state & NUD_FAILED))
1436 neigh->nud_state = NUD_INCOMPLETE;
1437 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1438 neigh_add_timer(neigh,
1439 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1441 EXPORT_SYMBOL(__neigh_set_probe_once);
1443 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1444 u8 *lladdr, void *saddr,
1445 struct net_device *dev)
1447 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1448 lladdr || !dev->addr_len);
1450 neigh_update(neigh, lladdr, NUD_STALE,
1451 NEIGH_UPDATE_F_OVERRIDE, 0);
1454 EXPORT_SYMBOL(neigh_event_ns);
1456 /* called with read_lock_bh(&n->lock); */
1457 static void neigh_hh_init(struct neighbour *n)
1459 struct net_device *dev = n->dev;
1460 __be16 prot = n->tbl->protocol;
1461 struct hh_cache *hh = &n->hh;
1463 write_lock_bh(&n->lock);
1465 /* Only one thread can come in here and initialize the
1469 dev->header_ops->cache(n, hh, prot);
1471 write_unlock_bh(&n->lock);
1474 /* Slow and careful. */
1476 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1480 if (!neigh_event_send(neigh, skb)) {
1482 struct net_device *dev = neigh->dev;
1485 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1486 neigh_hh_init(neigh);
1489 __skb_pull(skb, skb_network_offset(skb));
1490 seq = read_seqbegin(&neigh->ha_lock);
1491 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1492 neigh->ha, NULL, skb->len);
1493 } while (read_seqretry(&neigh->ha_lock, seq));
1496 rc = dev_queue_xmit(skb);
1507 EXPORT_SYMBOL(neigh_resolve_output);
1509 /* As fast as possible without hh cache */
1511 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1513 struct net_device *dev = neigh->dev;
1518 __skb_pull(skb, skb_network_offset(skb));
1519 seq = read_seqbegin(&neigh->ha_lock);
1520 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1521 neigh->ha, NULL, skb->len);
1522 } while (read_seqretry(&neigh->ha_lock, seq));
1525 err = dev_queue_xmit(skb);
1532 EXPORT_SYMBOL(neigh_connected_output);
1534 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1536 return dev_queue_xmit(skb);
1538 EXPORT_SYMBOL(neigh_direct_output);
1540 static void neigh_proxy_process(struct timer_list *t)
1542 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1543 long sched_next = 0;
1544 unsigned long now = jiffies;
1545 struct sk_buff *skb, *n;
1547 spin_lock(&tbl->proxy_queue.lock);
1549 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1550 long tdif = NEIGH_CB(skb)->sched_next - now;
1553 struct net_device *dev = skb->dev;
1555 __skb_unlink(skb, &tbl->proxy_queue);
1556 if (tbl->proxy_redo && netif_running(dev)) {
1558 tbl->proxy_redo(skb);
1565 } else if (!sched_next || tdif < sched_next)
1568 del_timer(&tbl->proxy_timer);
1570 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1571 spin_unlock(&tbl->proxy_queue.lock);
1574 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1575 struct sk_buff *skb)
1577 unsigned long now = jiffies;
1579 unsigned long sched_next = now + (prandom_u32() %
1580 NEIGH_VAR(p, PROXY_DELAY));
1582 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1587 NEIGH_CB(skb)->sched_next = sched_next;
1588 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1590 spin_lock(&tbl->proxy_queue.lock);
1591 if (del_timer(&tbl->proxy_timer)) {
1592 if (time_before(tbl->proxy_timer.expires, sched_next))
1593 sched_next = tbl->proxy_timer.expires;
1597 __skb_queue_tail(&tbl->proxy_queue, skb);
1598 mod_timer(&tbl->proxy_timer, sched_next);
1599 spin_unlock(&tbl->proxy_queue.lock);
1601 EXPORT_SYMBOL(pneigh_enqueue);
1603 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1604 struct net *net, int ifindex)
1606 struct neigh_parms *p;
1608 list_for_each_entry(p, &tbl->parms_list, list) {
1609 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1610 (!p->dev && !ifindex && net_eq(net, &init_net)))
1617 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1618 struct neigh_table *tbl)
1620 struct neigh_parms *p;
1621 struct net *net = dev_net(dev);
1622 const struct net_device_ops *ops = dev->netdev_ops;
1624 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1627 refcount_set(&p->refcnt, 1);
1629 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1632 write_pnet(&p->net, net);
1633 p->sysctl_table = NULL;
1635 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1641 write_lock_bh(&tbl->lock);
1642 list_add(&p->list, &tbl->parms.list);
1643 write_unlock_bh(&tbl->lock);
1645 neigh_parms_data_state_cleanall(p);
1649 EXPORT_SYMBOL(neigh_parms_alloc);
1651 static void neigh_rcu_free_parms(struct rcu_head *head)
1653 struct neigh_parms *parms =
1654 container_of(head, struct neigh_parms, rcu_head);
1656 neigh_parms_put(parms);
1659 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1661 if (!parms || parms == &tbl->parms)
1663 write_lock_bh(&tbl->lock);
1664 list_del(&parms->list);
1666 write_unlock_bh(&tbl->lock);
1668 dev_put(parms->dev);
1669 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1671 EXPORT_SYMBOL(neigh_parms_release);
1673 static void neigh_parms_destroy(struct neigh_parms *parms)
1678 static struct lock_class_key neigh_table_proxy_queue_class;
1680 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1682 void neigh_table_init(int index, struct neigh_table *tbl)
1684 unsigned long now = jiffies;
1685 unsigned long phsize;
1687 INIT_LIST_HEAD(&tbl->parms_list);
1688 INIT_LIST_HEAD(&tbl->gc_list);
1689 list_add(&tbl->parms.list, &tbl->parms_list);
1690 write_pnet(&tbl->parms.net, &init_net);
1691 refcount_set(&tbl->parms.refcnt, 1);
1692 tbl->parms.reachable_time =
1693 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1695 tbl->stats = alloc_percpu(struct neigh_statistics);
1697 panic("cannot create neighbour cache statistics");
1699 #ifdef CONFIG_PROC_FS
1700 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1701 &neigh_stat_seq_ops, tbl))
1702 panic("cannot create neighbour proc dir entry");
1705 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1707 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1708 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1710 if (!tbl->nht || !tbl->phash_buckets)
1711 panic("cannot allocate neighbour cache hashes");
1713 if (!tbl->entry_size)
1714 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1715 tbl->key_len, NEIGH_PRIV_ALIGN);
1717 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1719 rwlock_init(&tbl->lock);
1720 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1721 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1722 tbl->parms.reachable_time);
1723 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1724 skb_queue_head_init_class(&tbl->proxy_queue,
1725 &neigh_table_proxy_queue_class);
1727 tbl->last_flush = now;
1728 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1730 neigh_tables[index] = tbl;
1732 EXPORT_SYMBOL(neigh_table_init);
1734 int neigh_table_clear(int index, struct neigh_table *tbl)
1736 neigh_tables[index] = NULL;
1737 /* It is not clean... Fix it to unload IPv6 module safely */
1738 cancel_delayed_work_sync(&tbl->gc_work);
1739 del_timer_sync(&tbl->proxy_timer);
1740 pneigh_queue_purge(&tbl->proxy_queue, NULL);
1741 neigh_ifdown(tbl, NULL);
1742 if (atomic_read(&tbl->entries))
1743 pr_crit("neighbour leakage\n");
1745 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1746 neigh_hash_free_rcu);
1749 kfree(tbl->phash_buckets);
1750 tbl->phash_buckets = NULL;
1752 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1754 free_percpu(tbl->stats);
1759 EXPORT_SYMBOL(neigh_table_clear);
1761 static struct neigh_table *neigh_find_table(int family)
1763 struct neigh_table *tbl = NULL;
1767 tbl = neigh_tables[NEIGH_ARP_TABLE];
1770 tbl = neigh_tables[NEIGH_ND_TABLE];
1777 const struct nla_policy nda_policy[NDA_MAX+1] = {
1778 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1779 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1780 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1781 [NDA_PROBES] = { .type = NLA_U32 },
1782 [NDA_VLAN] = { .type = NLA_U16 },
1783 [NDA_PORT] = { .type = NLA_U16 },
1784 [NDA_VNI] = { .type = NLA_U32 },
1785 [NDA_IFINDEX] = { .type = NLA_U32 },
1786 [NDA_MASTER] = { .type = NLA_U32 },
1787 [NDA_PROTOCOL] = { .type = NLA_U8 },
1790 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1791 struct netlink_ext_ack *extack)
1793 struct net *net = sock_net(skb->sk);
1795 struct nlattr *dst_attr;
1796 struct neigh_table *tbl;
1797 struct neighbour *neigh;
1798 struct net_device *dev = NULL;
1802 if (nlmsg_len(nlh) < sizeof(*ndm))
1805 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1807 NL_SET_ERR_MSG(extack, "Network address not specified");
1811 ndm = nlmsg_data(nlh);
1812 if (ndm->ndm_ifindex) {
1813 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1820 tbl = neigh_find_table(ndm->ndm_family);
1822 return -EAFNOSUPPORT;
1824 if (nla_len(dst_attr) < (int)tbl->key_len) {
1825 NL_SET_ERR_MSG(extack, "Invalid network address");
1829 if (ndm->ndm_flags & NTF_PROXY) {
1830 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1837 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1838 if (neigh == NULL) {
1843 err = __neigh_update(neigh, NULL, NUD_FAILED,
1844 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1845 NETLINK_CB(skb).portid, extack);
1846 write_lock_bh(&tbl->lock);
1847 neigh_release(neigh);
1848 neigh_remove_one(neigh, tbl);
1849 write_unlock_bh(&tbl->lock);
1855 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1856 struct netlink_ext_ack *extack)
1858 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1859 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1860 struct net *net = sock_net(skb->sk);
1862 struct nlattr *tb[NDA_MAX+1];
1863 struct neigh_table *tbl;
1864 struct net_device *dev = NULL;
1865 struct neighbour *neigh;
1871 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1872 nda_policy, extack);
1878 NL_SET_ERR_MSG(extack, "Network address not specified");
1882 ndm = nlmsg_data(nlh);
1883 if (ndm->ndm_ifindex) {
1884 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1890 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1891 NL_SET_ERR_MSG(extack, "Invalid link address");
1896 tbl = neigh_find_table(ndm->ndm_family);
1898 return -EAFNOSUPPORT;
1900 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1901 NL_SET_ERR_MSG(extack, "Invalid network address");
1905 dst = nla_data(tb[NDA_DST]);
1906 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1908 if (tb[NDA_PROTOCOL])
1909 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1911 if (ndm->ndm_flags & NTF_PROXY) {
1912 struct pneigh_entry *pn;
1915 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1917 pn->flags = ndm->ndm_flags;
1919 pn->protocol = protocol;
1926 NL_SET_ERR_MSG(extack, "Device not specified");
1930 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
1935 neigh = neigh_lookup(tbl, dst, dev);
1936 if (neigh == NULL) {
1937 bool exempt_from_gc;
1939 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1944 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1945 ndm->ndm_flags & NTF_EXT_LEARNED;
1946 neigh = ___neigh_create(tbl, dst, dev,
1947 ndm->ndm_flags & NTF_EXT_LEARNED,
1948 exempt_from_gc, true);
1949 if (IS_ERR(neigh)) {
1950 err = PTR_ERR(neigh);
1954 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1956 neigh_release(neigh);
1960 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1961 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1962 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1966 neigh->protocol = protocol;
1967 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1968 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1969 if (ndm->ndm_flags & NTF_ROUTER)
1970 flags |= NEIGH_UPDATE_F_ISROUTER;
1971 if (ndm->ndm_flags & NTF_USE)
1972 flags |= NEIGH_UPDATE_F_USE;
1974 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1975 NETLINK_CB(skb).portid, extack);
1976 if (!err && ndm->ndm_flags & NTF_USE) {
1977 neigh_event_send(neigh, NULL);
1980 neigh_release(neigh);
1985 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1987 struct nlattr *nest;
1989 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
1994 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1995 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1996 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1997 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1998 /* approximative value for deprecated QUEUE_LEN (in packets) */
1999 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2000 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2001 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2002 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2003 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2004 NEIGH_VAR(parms, UCAST_PROBES)) ||
2005 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2006 NEIGH_VAR(parms, MCAST_PROBES)) ||
2007 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2008 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2009 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2011 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2012 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2013 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2014 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2015 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2016 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2017 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2018 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2019 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2020 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2021 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2022 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2023 nla_put_msecs(skb, NDTPA_LOCKTIME,
2024 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2025 goto nla_put_failure;
2026 return nla_nest_end(skb, nest);
2029 nla_nest_cancel(skb, nest);
2033 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2034 u32 pid, u32 seq, int type, int flags)
2036 struct nlmsghdr *nlh;
2037 struct ndtmsg *ndtmsg;
2039 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2043 ndtmsg = nlmsg_data(nlh);
2045 read_lock_bh(&tbl->lock);
2046 ndtmsg->ndtm_family = tbl->family;
2047 ndtmsg->ndtm_pad1 = 0;
2048 ndtmsg->ndtm_pad2 = 0;
2050 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2051 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2052 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2053 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2054 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2055 goto nla_put_failure;
2057 unsigned long now = jiffies;
2058 long flush_delta = now - tbl->last_flush;
2059 long rand_delta = now - tbl->last_rand;
2060 struct neigh_hash_table *nht;
2061 struct ndt_config ndc = {
2062 .ndtc_key_len = tbl->key_len,
2063 .ndtc_entry_size = tbl->entry_size,
2064 .ndtc_entries = atomic_read(&tbl->entries),
2065 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2066 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2067 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2071 nht = rcu_dereference_bh(tbl->nht);
2072 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2073 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2074 rcu_read_unlock_bh();
2076 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2077 goto nla_put_failure;
2082 struct ndt_stats ndst;
2084 memset(&ndst, 0, sizeof(ndst));
2086 for_each_possible_cpu(cpu) {
2087 struct neigh_statistics *st;
2089 st = per_cpu_ptr(tbl->stats, cpu);
2090 ndst.ndts_allocs += st->allocs;
2091 ndst.ndts_destroys += st->destroys;
2092 ndst.ndts_hash_grows += st->hash_grows;
2093 ndst.ndts_res_failed += st->res_failed;
2094 ndst.ndts_lookups += st->lookups;
2095 ndst.ndts_hits += st->hits;
2096 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2097 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2098 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2099 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2100 ndst.ndts_table_fulls += st->table_fulls;
2103 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2105 goto nla_put_failure;
2108 BUG_ON(tbl->parms.dev);
2109 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2110 goto nla_put_failure;
2112 read_unlock_bh(&tbl->lock);
2113 nlmsg_end(skb, nlh);
2117 read_unlock_bh(&tbl->lock);
2118 nlmsg_cancel(skb, nlh);
2122 static int neightbl_fill_param_info(struct sk_buff *skb,
2123 struct neigh_table *tbl,
2124 struct neigh_parms *parms,
2125 u32 pid, u32 seq, int type,
2128 struct ndtmsg *ndtmsg;
2129 struct nlmsghdr *nlh;
2131 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2135 ndtmsg = nlmsg_data(nlh);
2137 read_lock_bh(&tbl->lock);
2138 ndtmsg->ndtm_family = tbl->family;
2139 ndtmsg->ndtm_pad1 = 0;
2140 ndtmsg->ndtm_pad2 = 0;
2142 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2143 neightbl_fill_parms(skb, parms) < 0)
2146 read_unlock_bh(&tbl->lock);
2147 nlmsg_end(skb, nlh);
2150 read_unlock_bh(&tbl->lock);
2151 nlmsg_cancel(skb, nlh);
2155 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2156 [NDTA_NAME] = { .type = NLA_STRING },
2157 [NDTA_THRESH1] = { .type = NLA_U32 },
2158 [NDTA_THRESH2] = { .type = NLA_U32 },
2159 [NDTA_THRESH3] = { .type = NLA_U32 },
2160 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2161 [NDTA_PARMS] = { .type = NLA_NESTED },
2164 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2165 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2166 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2167 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2168 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2169 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2170 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2171 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2172 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2173 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2174 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2175 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2176 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2177 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2178 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2181 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2182 struct netlink_ext_ack *extack)
2184 struct net *net = sock_net(skb->sk);
2185 struct neigh_table *tbl;
2186 struct ndtmsg *ndtmsg;
2187 struct nlattr *tb[NDTA_MAX+1];
2191 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2192 nl_neightbl_policy, extack);
2196 if (tb[NDTA_NAME] == NULL) {
2201 ndtmsg = nlmsg_data(nlh);
2203 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2204 tbl = neigh_tables[tidx];
2207 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2209 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2219 * We acquire tbl->lock to be nice to the periodic timers and
2220 * make sure they always see a consistent set of values.
2222 write_lock_bh(&tbl->lock);
2224 if (tb[NDTA_PARMS]) {
2225 struct nlattr *tbp[NDTPA_MAX+1];
2226 struct neigh_parms *p;
2229 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2231 nl_ntbl_parm_policy, extack);
2233 goto errout_tbl_lock;
2235 if (tbp[NDTPA_IFINDEX])
2236 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2238 p = lookup_neigh_parms(tbl, net, ifindex);
2241 goto errout_tbl_lock;
2244 for (i = 1; i <= NDTPA_MAX; i++) {
2249 case NDTPA_QUEUE_LEN:
2250 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2251 nla_get_u32(tbp[i]) *
2252 SKB_TRUESIZE(ETH_FRAME_LEN));
2254 case NDTPA_QUEUE_LENBYTES:
2255 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2256 nla_get_u32(tbp[i]));
2258 case NDTPA_PROXY_QLEN:
2259 NEIGH_VAR_SET(p, PROXY_QLEN,
2260 nla_get_u32(tbp[i]));
2262 case NDTPA_APP_PROBES:
2263 NEIGH_VAR_SET(p, APP_PROBES,
2264 nla_get_u32(tbp[i]));
2266 case NDTPA_UCAST_PROBES:
2267 NEIGH_VAR_SET(p, UCAST_PROBES,
2268 nla_get_u32(tbp[i]));
2270 case NDTPA_MCAST_PROBES:
2271 NEIGH_VAR_SET(p, MCAST_PROBES,
2272 nla_get_u32(tbp[i]));
2274 case NDTPA_MCAST_REPROBES:
2275 NEIGH_VAR_SET(p, MCAST_REPROBES,
2276 nla_get_u32(tbp[i]));
2278 case NDTPA_BASE_REACHABLE_TIME:
2279 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2280 nla_get_msecs(tbp[i]));
2281 /* update reachable_time as well, otherwise, the change will
2282 * only be effective after the next time neigh_periodic_work
2283 * decides to recompute it (can be multiple minutes)
2286 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2288 case NDTPA_GC_STALETIME:
2289 NEIGH_VAR_SET(p, GC_STALETIME,
2290 nla_get_msecs(tbp[i]));
2292 case NDTPA_DELAY_PROBE_TIME:
2293 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2294 nla_get_msecs(tbp[i]));
2295 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2297 case NDTPA_RETRANS_TIME:
2298 NEIGH_VAR_SET(p, RETRANS_TIME,
2299 nla_get_msecs(tbp[i]));
2301 case NDTPA_ANYCAST_DELAY:
2302 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2303 nla_get_msecs(tbp[i]));
2305 case NDTPA_PROXY_DELAY:
2306 NEIGH_VAR_SET(p, PROXY_DELAY,
2307 nla_get_msecs(tbp[i]));
2309 case NDTPA_LOCKTIME:
2310 NEIGH_VAR_SET(p, LOCKTIME,
2311 nla_get_msecs(tbp[i]));
2318 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2319 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2320 !net_eq(net, &init_net))
2321 goto errout_tbl_lock;
2323 if (tb[NDTA_THRESH1])
2324 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2326 if (tb[NDTA_THRESH2])
2327 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2329 if (tb[NDTA_THRESH3])
2330 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2332 if (tb[NDTA_GC_INTERVAL])
2333 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2338 write_unlock_bh(&tbl->lock);
2343 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2344 struct netlink_ext_ack *extack)
2346 struct ndtmsg *ndtm;
2348 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2349 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2353 ndtm = nlmsg_data(nlh);
2354 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2355 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2359 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2360 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2367 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2369 const struct nlmsghdr *nlh = cb->nlh;
2370 struct net *net = sock_net(skb->sk);
2371 int family, tidx, nidx = 0;
2372 int tbl_skip = cb->args[0];
2373 int neigh_skip = cb->args[1];
2374 struct neigh_table *tbl;
2376 if (cb->strict_check) {
2377 int err = neightbl_valid_dump_info(nlh, cb->extack);
2383 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2385 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2386 struct neigh_parms *p;
2388 tbl = neigh_tables[tidx];
2392 if (tidx < tbl_skip || (family && tbl->family != family))
2395 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2396 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2401 p = list_next_entry(&tbl->parms, list);
2402 list_for_each_entry_from(p, &tbl->parms_list, list) {
2403 if (!net_eq(neigh_parms_net(p), net))
2406 if (nidx < neigh_skip)
2409 if (neightbl_fill_param_info(skb, tbl, p,
2410 NETLINK_CB(cb->skb).portid,
2428 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2429 u32 pid, u32 seq, int type, unsigned int flags)
2431 unsigned long now = jiffies;
2432 struct nda_cacheinfo ci;
2433 struct nlmsghdr *nlh;
2436 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2440 ndm = nlmsg_data(nlh);
2441 ndm->ndm_family = neigh->ops->family;
2444 ndm->ndm_flags = neigh->flags;
2445 ndm->ndm_type = neigh->type;
2446 ndm->ndm_ifindex = neigh->dev->ifindex;
2448 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2449 goto nla_put_failure;
2451 read_lock_bh(&neigh->lock);
2452 ndm->ndm_state = neigh->nud_state;
2453 if (neigh->nud_state & NUD_VALID) {
2454 char haddr[MAX_ADDR_LEN];
2456 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2457 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2458 read_unlock_bh(&neigh->lock);
2459 goto nla_put_failure;
2463 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2464 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2465 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2466 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2467 read_unlock_bh(&neigh->lock);
2469 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2470 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2471 goto nla_put_failure;
2473 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2474 goto nla_put_failure;
2476 nlmsg_end(skb, nlh);
2480 nlmsg_cancel(skb, nlh);
2484 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2485 u32 pid, u32 seq, int type, unsigned int flags,
2486 struct neigh_table *tbl)
2488 struct nlmsghdr *nlh;
2491 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2495 ndm = nlmsg_data(nlh);
2496 ndm->ndm_family = tbl->family;
2499 ndm->ndm_flags = pn->flags | NTF_PROXY;
2500 ndm->ndm_type = RTN_UNICAST;
2501 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2502 ndm->ndm_state = NUD_NONE;
2504 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2505 goto nla_put_failure;
2507 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2508 goto nla_put_failure;
2510 nlmsg_end(skb, nlh);
2514 nlmsg_cancel(skb, nlh);
2518 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2520 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2521 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2524 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2526 struct net_device *master;
2531 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2532 if (!master || master->ifindex != master_idx)
2538 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2540 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2546 struct neigh_dump_filter {
2551 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2552 struct netlink_callback *cb,
2553 struct neigh_dump_filter *filter)
2555 struct net *net = sock_net(skb->sk);
2556 struct neighbour *n;
2557 int rc, h, s_h = cb->args[1];
2558 int idx, s_idx = idx = cb->args[2];
2559 struct neigh_hash_table *nht;
2560 unsigned int flags = NLM_F_MULTI;
2562 if (filter->dev_idx || filter->master_idx)
2563 flags |= NLM_F_DUMP_FILTERED;
2566 nht = rcu_dereference_bh(tbl->nht);
2568 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2571 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2573 n = rcu_dereference_bh(n->next)) {
2574 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2576 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2577 neigh_master_filtered(n->dev, filter->master_idx))
2579 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2592 rcu_read_unlock_bh();
2598 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2599 struct netlink_callback *cb,
2600 struct neigh_dump_filter *filter)
2602 struct pneigh_entry *n;
2603 struct net *net = sock_net(skb->sk);
2604 int rc, h, s_h = cb->args[3];
2605 int idx, s_idx = idx = cb->args[4];
2606 unsigned int flags = NLM_F_MULTI;
2608 if (filter->dev_idx || filter->master_idx)
2609 flags |= NLM_F_DUMP_FILTERED;
2611 read_lock_bh(&tbl->lock);
2613 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2616 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2617 if (idx < s_idx || pneigh_net(n) != net)
2619 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2620 neigh_master_filtered(n->dev, filter->master_idx))
2622 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2624 RTM_NEWNEIGH, flags, tbl) < 0) {
2625 read_unlock_bh(&tbl->lock);
2634 read_unlock_bh(&tbl->lock);
2643 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2645 struct neigh_dump_filter *filter,
2646 struct netlink_ext_ack *extack)
2648 struct nlattr *tb[NDA_MAX + 1];
2654 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2655 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2659 ndm = nlmsg_data(nlh);
2660 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2661 ndm->ndm_state || ndm->ndm_type) {
2662 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2666 if (ndm->ndm_flags & ~NTF_PROXY) {
2667 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2671 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2672 tb, NDA_MAX, nda_policy,
2675 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2676 NDA_MAX, nda_policy, extack);
2681 for (i = 0; i <= NDA_MAX; ++i) {
2685 /* all new attributes should require strict_check */
2688 filter->dev_idx = nla_get_u32(tb[i]);
2691 filter->master_idx = nla_get_u32(tb[i]);
2695 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2704 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2706 const struct nlmsghdr *nlh = cb->nlh;
2707 struct neigh_dump_filter filter = {};
2708 struct neigh_table *tbl;
2713 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2715 /* check for full ndmsg structure presence, family member is
2716 * the same for both structures
2718 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2719 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2722 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2723 if (err < 0 && cb->strict_check)
2728 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2729 tbl = neigh_tables[t];
2733 if (t < s_t || (family && tbl->family != family))
2736 memset(&cb->args[1], 0, sizeof(cb->args) -
2737 sizeof(cb->args[0]));
2739 err = pneigh_dump_table(tbl, skb, cb, &filter);
2741 err = neigh_dump_table(tbl, skb, cb, &filter);
2750 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2751 struct neigh_table **tbl,
2752 void **dst, int *dev_idx, u8 *ndm_flags,
2753 struct netlink_ext_ack *extack)
2755 struct nlattr *tb[NDA_MAX + 1];
2759 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2760 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2764 ndm = nlmsg_data(nlh);
2765 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2767 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2771 if (ndm->ndm_flags & ~NTF_PROXY) {
2772 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2776 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2777 NDA_MAX, nda_policy, extack);
2781 *ndm_flags = ndm->ndm_flags;
2782 *dev_idx = ndm->ndm_ifindex;
2783 *tbl = neigh_find_table(ndm->ndm_family);
2785 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2786 return -EAFNOSUPPORT;
2789 for (i = 0; i <= NDA_MAX; ++i) {
2795 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2796 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2799 *dst = nla_data(tb[i]);
2802 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2810 static inline size_t neigh_nlmsg_size(void)
2812 return NLMSG_ALIGN(sizeof(struct ndmsg))
2813 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2814 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2815 + nla_total_size(sizeof(struct nda_cacheinfo))
2816 + nla_total_size(4) /* NDA_PROBES */
2817 + nla_total_size(1); /* NDA_PROTOCOL */
2820 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2823 struct sk_buff *skb;
2826 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2830 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2836 err = rtnl_unicast(skb, net, pid);
2841 static inline size_t pneigh_nlmsg_size(void)
2843 return NLMSG_ALIGN(sizeof(struct ndmsg))
2844 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2845 + nla_total_size(1); /* NDA_PROTOCOL */
2848 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2849 u32 pid, u32 seq, struct neigh_table *tbl)
2851 struct sk_buff *skb;
2854 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2858 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2864 err = rtnl_unicast(skb, net, pid);
2869 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2870 struct netlink_ext_ack *extack)
2872 struct net *net = sock_net(in_skb->sk);
2873 struct net_device *dev = NULL;
2874 struct neigh_table *tbl = NULL;
2875 struct neighbour *neigh;
2881 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2887 dev = __dev_get_by_index(net, dev_idx);
2889 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2895 NL_SET_ERR_MSG(extack, "Network address not specified");
2899 if (ndm_flags & NTF_PROXY) {
2900 struct pneigh_entry *pn;
2902 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2904 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2907 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2908 nlh->nlmsg_seq, tbl);
2912 NL_SET_ERR_MSG(extack, "No device specified");
2916 neigh = neigh_lookup(tbl, dst, dev);
2918 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2922 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2925 neigh_release(neigh);
2930 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2933 struct neigh_hash_table *nht;
2936 nht = rcu_dereference_bh(tbl->nht);
2938 read_lock(&tbl->lock); /* avoid resizes */
2939 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2940 struct neighbour *n;
2942 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2944 n = rcu_dereference_bh(n->next))
2947 read_unlock(&tbl->lock);
2948 rcu_read_unlock_bh();
2950 EXPORT_SYMBOL(neigh_for_each);
2952 /* The tbl->lock must be held as a writer and BH disabled. */
2953 void __neigh_for_each_release(struct neigh_table *tbl,
2954 int (*cb)(struct neighbour *))
2957 struct neigh_hash_table *nht;
2959 nht = rcu_dereference_protected(tbl->nht,
2960 lockdep_is_held(&tbl->lock));
2961 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2962 struct neighbour *n;
2963 struct neighbour __rcu **np;
2965 np = &nht->hash_buckets[chain];
2966 while ((n = rcu_dereference_protected(*np,
2967 lockdep_is_held(&tbl->lock))) != NULL) {
2970 write_lock(&n->lock);
2973 rcu_assign_pointer(*np,
2974 rcu_dereference_protected(n->next,
2975 lockdep_is_held(&tbl->lock)));
2979 write_unlock(&n->lock);
2981 neigh_cleanup_and_release(n);
2985 EXPORT_SYMBOL(__neigh_for_each_release);
2987 int neigh_xmit(int index, struct net_device *dev,
2988 const void *addr, struct sk_buff *skb)
2990 int err = -EAFNOSUPPORT;
2991 if (likely(index < NEIGH_NR_TABLES)) {
2992 struct neigh_table *tbl;
2993 struct neighbour *neigh;
2995 tbl = neigh_tables[index];
2999 if (index == NEIGH_ARP_TABLE) {
3000 u32 key = *((u32 *)addr);
3002 neigh = __ipv4_neigh_lookup_noref(dev, key);
3004 neigh = __neigh_lookup_noref(tbl, addr, dev);
3007 neigh = __neigh_create(tbl, addr, dev, false);
3008 err = PTR_ERR(neigh);
3009 if (IS_ERR(neigh)) {
3010 rcu_read_unlock_bh();
3013 err = neigh->output(neigh, skb);
3014 rcu_read_unlock_bh();
3016 else if (index == NEIGH_LINK_TABLE) {
3017 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3018 addr, NULL, skb->len);
3021 err = dev_queue_xmit(skb);
3029 EXPORT_SYMBOL(neigh_xmit);
3031 #ifdef CONFIG_PROC_FS
3033 static struct neighbour *neigh_get_first(struct seq_file *seq)
3035 struct neigh_seq_state *state = seq->private;
3036 struct net *net = seq_file_net(seq);
3037 struct neigh_hash_table *nht = state->nht;
3038 struct neighbour *n = NULL;
3041 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3042 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3043 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3046 if (!net_eq(dev_net(n->dev), net))
3048 if (state->neigh_sub_iter) {
3052 v = state->neigh_sub_iter(state, n, &fakep);
3056 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3058 if (n->nud_state & ~NUD_NOARP)
3061 n = rcu_dereference_bh(n->next);
3067 state->bucket = bucket;
3072 static struct neighbour *neigh_get_next(struct seq_file *seq,
3073 struct neighbour *n,
3076 struct neigh_seq_state *state = seq->private;
3077 struct net *net = seq_file_net(seq);
3078 struct neigh_hash_table *nht = state->nht;
3080 if (state->neigh_sub_iter) {
3081 void *v = state->neigh_sub_iter(state, n, pos);
3085 n = rcu_dereference_bh(n->next);
3089 if (!net_eq(dev_net(n->dev), net))
3091 if (state->neigh_sub_iter) {
3092 void *v = state->neigh_sub_iter(state, n, pos);
3097 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3100 if (n->nud_state & ~NUD_NOARP)
3103 n = rcu_dereference_bh(n->next);
3109 if (++state->bucket >= (1 << nht->hash_shift))
3112 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3120 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3122 struct neighbour *n = neigh_get_first(seq);
3127 n = neigh_get_next(seq, n, pos);
3132 return *pos ? NULL : n;
3135 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3137 struct neigh_seq_state *state = seq->private;
3138 struct net *net = seq_file_net(seq);
3139 struct neigh_table *tbl = state->tbl;
3140 struct pneigh_entry *pn = NULL;
3141 int bucket = state->bucket;
3143 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3144 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3145 pn = tbl->phash_buckets[bucket];
3146 while (pn && !net_eq(pneigh_net(pn), net))
3151 state->bucket = bucket;
3156 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3157 struct pneigh_entry *pn,
3160 struct neigh_seq_state *state = seq->private;
3161 struct net *net = seq_file_net(seq);
3162 struct neigh_table *tbl = state->tbl;
3166 } while (pn && !net_eq(pneigh_net(pn), net));
3169 if (++state->bucket > PNEIGH_HASHMASK)
3171 pn = tbl->phash_buckets[state->bucket];
3172 while (pn && !net_eq(pneigh_net(pn), net))
3184 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3186 struct pneigh_entry *pn = pneigh_get_first(seq);
3191 pn = pneigh_get_next(seq, pn, pos);
3196 return *pos ? NULL : pn;
3199 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3201 struct neigh_seq_state *state = seq->private;
3203 loff_t idxpos = *pos;
3205 rc = neigh_get_idx(seq, &idxpos);
3206 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3207 rc = pneigh_get_idx(seq, &idxpos);
3212 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3213 __acquires(tbl->lock)
3216 struct neigh_seq_state *state = seq->private;
3220 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3223 state->nht = rcu_dereference_bh(tbl->nht);
3224 read_lock(&tbl->lock);
3226 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3228 EXPORT_SYMBOL(neigh_seq_start);
3230 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3232 struct neigh_seq_state *state;
3235 if (v == SEQ_START_TOKEN) {
3236 rc = neigh_get_first(seq);
3240 state = seq->private;
3241 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3242 rc = neigh_get_next(seq, v, NULL);
3245 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3246 rc = pneigh_get_first(seq);
3248 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3249 rc = pneigh_get_next(seq, v, NULL);
3255 EXPORT_SYMBOL(neigh_seq_next);
3257 void neigh_seq_stop(struct seq_file *seq, void *v)
3258 __releases(tbl->lock)
3261 struct neigh_seq_state *state = seq->private;
3262 struct neigh_table *tbl = state->tbl;
3264 read_unlock(&tbl->lock);
3265 rcu_read_unlock_bh();
3267 EXPORT_SYMBOL(neigh_seq_stop);
3269 /* statistics via seq_file */
3271 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3273 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3277 return SEQ_START_TOKEN;
3279 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3280 if (!cpu_possible(cpu))
3283 return per_cpu_ptr(tbl->stats, cpu);
3288 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3290 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3293 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3294 if (!cpu_possible(cpu))
3297 return per_cpu_ptr(tbl->stats, cpu);
3303 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3308 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3310 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3311 struct neigh_statistics *st = v;
3313 if (v == SEQ_START_TOKEN) {
3314 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3318 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3319 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3320 atomic_read(&tbl->entries),
3331 st->rcv_probes_mcast,
3332 st->rcv_probes_ucast,
3334 st->periodic_gc_runs,
3343 static const struct seq_operations neigh_stat_seq_ops = {
3344 .start = neigh_stat_seq_start,
3345 .next = neigh_stat_seq_next,
3346 .stop = neigh_stat_seq_stop,
3347 .show = neigh_stat_seq_show,
3349 #endif /* CONFIG_PROC_FS */
3351 static void __neigh_notify(struct neighbour *n, int type, int flags,
3354 struct net *net = dev_net(n->dev);
3355 struct sk_buff *skb;
3358 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3362 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3364 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3365 WARN_ON(err == -EMSGSIZE);
3369 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3373 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3376 void neigh_app_ns(struct neighbour *n)
3378 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3380 EXPORT_SYMBOL(neigh_app_ns);
3382 #ifdef CONFIG_SYSCTL
3383 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3385 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3386 void __user *buffer, size_t *lenp, loff_t *ppos)
3389 struct ctl_table tmp = *ctl;
3391 tmp.extra1 = SYSCTL_ZERO;
3392 tmp.extra2 = &unres_qlen_max;
3395 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3396 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3399 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3403 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3408 return __in_dev_arp_parms_get_rcu(dev);
3410 return __in6_dev_nd_parms_get_rcu(dev);
3415 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3418 struct net_device *dev;
3419 int family = neigh_parms_family(p);
3422 for_each_netdev_rcu(net, dev) {
3423 struct neigh_parms *dst_p =
3424 neigh_get_dev_parms_rcu(dev, family);
3426 if (dst_p && !test_bit(index, dst_p->data_state))
3427 dst_p->data[index] = p->data[index];
3432 static void neigh_proc_update(struct ctl_table *ctl, int write)
3434 struct net_device *dev = ctl->extra1;
3435 struct neigh_parms *p = ctl->extra2;
3436 struct net *net = neigh_parms_net(p);
3437 int index = (int *) ctl->data - p->data;
3442 set_bit(index, p->data_state);
3443 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3444 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3445 if (!dev) /* NULL dev means this is default value */
3446 neigh_copy_dflt_parms(net, p, index);
3449 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3450 void __user *buffer,
3451 size_t *lenp, loff_t *ppos)
3453 struct ctl_table tmp = *ctl;
3456 tmp.extra1 = SYSCTL_ZERO;
3457 tmp.extra2 = SYSCTL_INT_MAX;
3459 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3460 neigh_proc_update(ctl, write);
3464 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3465 void __user *buffer, size_t *lenp, loff_t *ppos)
3467 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3469 neigh_proc_update(ctl, write);
3472 EXPORT_SYMBOL(neigh_proc_dointvec);
3474 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3475 void __user *buffer,
3476 size_t *lenp, loff_t *ppos)
3478 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3480 neigh_proc_update(ctl, write);
3483 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3485 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3486 void __user *buffer,
3487 size_t *lenp, loff_t *ppos)
3489 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3491 neigh_proc_update(ctl, write);
3495 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3496 void __user *buffer,
3497 size_t *lenp, loff_t *ppos)
3499 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3501 neigh_proc_update(ctl, write);
3504 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3506 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3507 void __user *buffer,
3508 size_t *lenp, loff_t *ppos)
3510 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3512 neigh_proc_update(ctl, write);
3516 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3517 void __user *buffer,
3518 size_t *lenp, loff_t *ppos)
3520 struct neigh_parms *p = ctl->extra2;
3523 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3524 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3525 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3526 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3530 if (write && ret == 0) {
3531 /* update reachable_time as well, otherwise, the change will
3532 * only be effective after the next time neigh_periodic_work
3533 * decides to recompute it
3536 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3541 #define NEIGH_PARMS_DATA_OFFSET(index) \
3542 (&((struct neigh_parms *) 0)->data[index])
3544 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3545 [NEIGH_VAR_ ## attr] = { \
3547 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3548 .maxlen = sizeof(int), \
3550 .proc_handler = proc, \
3553 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3554 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3556 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3557 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3559 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3560 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3562 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3563 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3565 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3566 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3568 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3569 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3571 static struct neigh_sysctl_table {
3572 struct ctl_table_header *sysctl_header;
3573 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3574 } neigh_sysctl_template __read_mostly = {
3576 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3577 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3578 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3579 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3580 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3581 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3582 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3583 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3584 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3585 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3586 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3587 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3588 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3589 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3590 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3591 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3592 [NEIGH_VAR_GC_INTERVAL] = {
3593 .procname = "gc_interval",
3594 .maxlen = sizeof(int),
3596 .proc_handler = proc_dointvec_jiffies,
3598 [NEIGH_VAR_GC_THRESH1] = {
3599 .procname = "gc_thresh1",
3600 .maxlen = sizeof(int),
3602 .extra1 = SYSCTL_ZERO,
3603 .extra2 = SYSCTL_INT_MAX,
3604 .proc_handler = proc_dointvec_minmax,
3606 [NEIGH_VAR_GC_THRESH2] = {
3607 .procname = "gc_thresh2",
3608 .maxlen = sizeof(int),
3610 .extra1 = SYSCTL_ZERO,
3611 .extra2 = SYSCTL_INT_MAX,
3612 .proc_handler = proc_dointvec_minmax,
3614 [NEIGH_VAR_GC_THRESH3] = {
3615 .procname = "gc_thresh3",
3616 .maxlen = sizeof(int),
3618 .extra1 = SYSCTL_ZERO,
3619 .extra2 = SYSCTL_INT_MAX,
3620 .proc_handler = proc_dointvec_minmax,
3626 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3627 proc_handler *handler)
3630 struct neigh_sysctl_table *t;
3631 const char *dev_name_source;
3632 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3635 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3639 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3640 t->neigh_vars[i].data += (long) p;
3641 t->neigh_vars[i].extra1 = dev;
3642 t->neigh_vars[i].extra2 = p;
3646 dev_name_source = dev->name;
3647 /* Terminate the table early */
3648 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3649 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3651 struct neigh_table *tbl = p->tbl;
3652 dev_name_source = "default";
3653 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3654 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3655 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3656 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3661 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3663 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3664 /* RetransTime (in milliseconds)*/
3665 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3666 /* ReachableTime (in milliseconds) */
3667 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3669 /* Those handlers will update p->reachable_time after
3670 * base_reachable_time(_ms) is set to ensure the new timer starts being
3671 * applied after the next neighbour update instead of waiting for
3672 * neigh_periodic_work to update its value (can be multiple minutes)
3673 * So any handler that replaces them should do this as well
3676 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3677 neigh_proc_base_reachable_time;
3678 /* ReachableTime (in milliseconds) */
3679 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3680 neigh_proc_base_reachable_time;
3683 /* Don't export sysctls to unprivileged users */
3684 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3685 t->neigh_vars[0].procname = NULL;
3687 switch (neigh_parms_family(p)) {
3698 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3699 p_name, dev_name_source);
3701 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3702 if (!t->sysctl_header)
3705 p->sysctl_table = t;
3713 EXPORT_SYMBOL(neigh_sysctl_register);
3715 void neigh_sysctl_unregister(struct neigh_parms *p)
3717 if (p->sysctl_table) {
3718 struct neigh_sysctl_table *t = p->sysctl_table;
3719 p->sysctl_table = NULL;
3720 unregister_net_sysctl_table(t->sysctl_header);
3724 EXPORT_SYMBOL(neigh_sysctl_unregister);
3726 #endif /* CONFIG_SYSCTL */
3728 static int __init neigh_init(void)
3730 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3731 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3732 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3734 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3736 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3741 subsys_initcall(neigh_init);