GNU Linux-libre 4.14.303-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58                            u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61                                     struct net_device *dev);
62
63 #ifdef CONFIG_PROC_FS
64 static const struct file_operations neigh_stat_seq_fops;
65 #endif
66
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81
82    Reference count prevents destruction.
83
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static bool neigh_del(struct neighbour *n, __u8 state,
125                       struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127         bool retval = false;
128
129         write_lock(&n->lock);
130         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
131                 struct neighbour *neigh;
132
133                 neigh = rcu_dereference_protected(n->next,
134                                                   lockdep_is_held(&tbl->lock));
135                 rcu_assign_pointer(*np, neigh);
136                 n->dead = 1;
137                 retval = true;
138         }
139         write_unlock(&n->lock);
140         if (retval)
141                 neigh_cleanup_and_release(n);
142         return retval;
143 }
144
145 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
146 {
147         struct neigh_hash_table *nht;
148         void *pkey = ndel->primary_key;
149         u32 hash_val;
150         struct neighbour *n;
151         struct neighbour __rcu **np;
152
153         nht = rcu_dereference_protected(tbl->nht,
154                                         lockdep_is_held(&tbl->lock));
155         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
156         hash_val = hash_val >> (32 - nht->hash_shift);
157
158         np = &nht->hash_buckets[hash_val];
159         while ((n = rcu_dereference_protected(*np,
160                                               lockdep_is_held(&tbl->lock)))) {
161                 if (n == ndel)
162                         return neigh_del(n, 0, np, tbl);
163                 np = &n->next;
164         }
165         return false;
166 }
167
168 static int neigh_forced_gc(struct neigh_table *tbl)
169 {
170         int shrunk = 0;
171         int i;
172         struct neigh_hash_table *nht;
173
174         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
175
176         write_lock_bh(&tbl->lock);
177         nht = rcu_dereference_protected(tbl->nht,
178                                         lockdep_is_held(&tbl->lock));
179         for (i = 0; i < (1 << nht->hash_shift); i++) {
180                 struct neighbour *n;
181                 struct neighbour __rcu **np;
182
183                 np = &nht->hash_buckets[i];
184                 while ((n = rcu_dereference_protected(*np,
185                                         lockdep_is_held(&tbl->lock))) != NULL) {
186                         /* Neighbour record may be discarded if:
187                          * - nobody refers to it.
188                          * - it is not permanent
189                          */
190                         if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
191                                 shrunk = 1;
192                                 continue;
193                         }
194                         np = &n->next;
195                 }
196         }
197
198         tbl->last_flush = jiffies;
199
200         write_unlock_bh(&tbl->lock);
201
202         return shrunk;
203 }
204
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207         neigh_hold(n);
208         if (unlikely(mod_timer(&n->timer, when))) {
209                 printk("NEIGH: BUG, double timer add, state is %x\n",
210                        n->nud_state);
211                 dump_stack();
212         }
213 }
214
215 static int neigh_del_timer(struct neighbour *n)
216 {
217         if ((n->nud_state & NUD_IN_TIMER) &&
218             del_timer(&n->timer)) {
219                 neigh_release(n);
220                 return 1;
221         }
222         return 0;
223 }
224
225 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
226 {
227         struct sk_buff_head tmp;
228         unsigned long flags;
229         struct sk_buff *skb;
230
231         skb_queue_head_init(&tmp);
232         spin_lock_irqsave(&list->lock, flags);
233         skb = skb_peek(list);
234         while (skb != NULL) {
235                 struct sk_buff *skb_next = skb_peek_next(skb, list);
236                 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
237                         __skb_unlink(skb, list);
238                         __skb_queue_tail(&tmp, skb);
239                 }
240                 skb = skb_next;
241         }
242         spin_unlock_irqrestore(&list->lock, flags);
243
244         while ((skb = __skb_dequeue(&tmp))) {
245                 dev_put(skb->dev);
246                 kfree_skb(skb);
247         }
248 }
249
250 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
251 {
252         int i;
253         struct neigh_hash_table *nht;
254
255         nht = rcu_dereference_protected(tbl->nht,
256                                         lockdep_is_held(&tbl->lock));
257
258         for (i = 0; i < (1 << nht->hash_shift); i++) {
259                 struct neighbour *n;
260                 struct neighbour __rcu **np = &nht->hash_buckets[i];
261
262                 while ((n = rcu_dereference_protected(*np,
263                                         lockdep_is_held(&tbl->lock))) != NULL) {
264                         if (dev && n->dev != dev) {
265                                 np = &n->next;
266                                 continue;
267                         }
268                         rcu_assign_pointer(*np,
269                                    rcu_dereference_protected(n->next,
270                                                 lockdep_is_held(&tbl->lock)));
271                         write_lock(&n->lock);
272                         neigh_del_timer(n);
273                         n->dead = 1;
274
275                         if (refcount_read(&n->refcnt) != 1) {
276                                 /* The most unpleasant situation.
277                                    We must destroy neighbour entry,
278                                    but someone still uses it.
279
280                                    The destroy will be delayed until
281                                    the last user releases us, but
282                                    we must kill timers etc. and move
283                                    it to safe state.
284                                  */
285                                 __skb_queue_purge(&n->arp_queue);
286                                 n->arp_queue_len_bytes = 0;
287                                 n->output = neigh_blackhole;
288                                 if (n->nud_state & NUD_VALID)
289                                         n->nud_state = NUD_NOARP;
290                                 else
291                                         n->nud_state = NUD_NONE;
292                                 neigh_dbg(2, "neigh %p is stray\n", n);
293                         }
294                         write_unlock(&n->lock);
295                         neigh_cleanup_and_release(n);
296                 }
297         }
298 }
299
300 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
301 {
302         write_lock_bh(&tbl->lock);
303         neigh_flush_dev(tbl, dev);
304         write_unlock_bh(&tbl->lock);
305 }
306 EXPORT_SYMBOL(neigh_changeaddr);
307
308 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
309 {
310         write_lock_bh(&tbl->lock);
311         neigh_flush_dev(tbl, dev);
312         pneigh_ifdown_and_unlock(tbl, dev);
313         pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
314         if (skb_queue_empty_lockless(&tbl->proxy_queue))
315                 del_timer_sync(&tbl->proxy_timer);
316         return 0;
317 }
318 EXPORT_SYMBOL(neigh_ifdown);
319
320 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
321 {
322         struct neighbour *n = NULL;
323         unsigned long now = jiffies;
324         int entries;
325
326         entries = atomic_inc_return(&tbl->entries) - 1;
327         if (entries >= tbl->gc_thresh3 ||
328             (entries >= tbl->gc_thresh2 &&
329              time_after(now, tbl->last_flush + 5 * HZ))) {
330                 if (!neigh_forced_gc(tbl) &&
331                     entries >= tbl->gc_thresh3) {
332                         net_info_ratelimited("%s: neighbor table overflow!\n",
333                                              tbl->id);
334                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
335                         goto out_entries;
336                 }
337         }
338
339         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
340         if (!n)
341                 goto out_entries;
342
343         __skb_queue_head_init(&n->arp_queue);
344         rwlock_init(&n->lock);
345         seqlock_init(&n->ha_lock);
346         n->updated        = n->used = now;
347         n->nud_state      = NUD_NONE;
348         n->output         = neigh_blackhole;
349         seqlock_init(&n->hh.hh_lock);
350         n->parms          = neigh_parms_clone(&tbl->parms);
351         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
352
353         NEIGH_CACHE_STAT_INC(tbl, allocs);
354         n->tbl            = tbl;
355         refcount_set(&n->refcnt, 1);
356         n->dead           = 1;
357 out:
358         return n;
359
360 out_entries:
361         atomic_dec(&tbl->entries);
362         goto out;
363 }
364
365 static void neigh_get_hash_rnd(u32 *x)
366 {
367         *x = get_random_u32() | 1;
368 }
369
370 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
371 {
372         size_t size = (1 << shift) * sizeof(struct neighbour *);
373         struct neigh_hash_table *ret;
374         struct neighbour __rcu **buckets;
375         int i;
376
377         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
378         if (!ret)
379                 return NULL;
380         if (size <= PAGE_SIZE) {
381                 buckets = kzalloc(size, GFP_ATOMIC);
382         } else {
383                 buckets = (struct neighbour __rcu **)
384                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
385                                            get_order(size));
386                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
387         }
388         if (!buckets) {
389                 kfree(ret);
390                 return NULL;
391         }
392         ret->hash_buckets = buckets;
393         ret->hash_shift = shift;
394         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
395                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
396         return ret;
397 }
398
399 static void neigh_hash_free_rcu(struct rcu_head *head)
400 {
401         struct neigh_hash_table *nht = container_of(head,
402                                                     struct neigh_hash_table,
403                                                     rcu);
404         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
405         struct neighbour __rcu **buckets = nht->hash_buckets;
406
407         if (size <= PAGE_SIZE) {
408                 kfree(buckets);
409         } else {
410                 kmemleak_free(buckets);
411                 free_pages((unsigned long)buckets, get_order(size));
412         }
413         kfree(nht);
414 }
415
416 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
417                                                 unsigned long new_shift)
418 {
419         unsigned int i, hash;
420         struct neigh_hash_table *new_nht, *old_nht;
421
422         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
423
424         old_nht = rcu_dereference_protected(tbl->nht,
425                                             lockdep_is_held(&tbl->lock));
426         new_nht = neigh_hash_alloc(new_shift);
427         if (!new_nht)
428                 return old_nht;
429
430         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
431                 struct neighbour *n, *next;
432
433                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
434                                                    lockdep_is_held(&tbl->lock));
435                      n != NULL;
436                      n = next) {
437                         hash = tbl->hash(n->primary_key, n->dev,
438                                          new_nht->hash_rnd);
439
440                         hash >>= (32 - new_nht->hash_shift);
441                         next = rcu_dereference_protected(n->next,
442                                                 lockdep_is_held(&tbl->lock));
443
444                         rcu_assign_pointer(n->next,
445                                            rcu_dereference_protected(
446                                                 new_nht->hash_buckets[hash],
447                                                 lockdep_is_held(&tbl->lock)));
448                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
449                 }
450         }
451
452         rcu_assign_pointer(tbl->nht, new_nht);
453         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
454         return new_nht;
455 }
456
457 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
458                                struct net_device *dev)
459 {
460         struct neighbour *n;
461
462         NEIGH_CACHE_STAT_INC(tbl, lookups);
463
464         rcu_read_lock_bh();
465         n = __neigh_lookup_noref(tbl, pkey, dev);
466         if (n) {
467                 if (!refcount_inc_not_zero(&n->refcnt))
468                         n = NULL;
469                 NEIGH_CACHE_STAT_INC(tbl, hits);
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup);
476
477 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
478                                      const void *pkey)
479 {
480         struct neighbour *n;
481         int key_len = tbl->key_len;
482         u32 hash_val;
483         struct neigh_hash_table *nht;
484
485         NEIGH_CACHE_STAT_INC(tbl, lookups);
486
487         rcu_read_lock_bh();
488         nht = rcu_dereference_bh(tbl->nht);
489         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
490
491         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
492              n != NULL;
493              n = rcu_dereference_bh(n->next)) {
494                 if (!memcmp(n->primary_key, pkey, key_len) &&
495                     net_eq(dev_net(n->dev), net)) {
496                         if (!refcount_inc_not_zero(&n->refcnt))
497                                 n = NULL;
498                         NEIGH_CACHE_STAT_INC(tbl, hits);
499                         break;
500                 }
501         }
502
503         rcu_read_unlock_bh();
504         return n;
505 }
506 EXPORT_SYMBOL(neigh_lookup_nodev);
507
508 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
509                                  struct net_device *dev, bool want_ref)
510 {
511         u32 hash_val;
512         int key_len = tbl->key_len;
513         int error;
514         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
515         struct neigh_hash_table *nht;
516
517         if (!n) {
518                 rc = ERR_PTR(-ENOBUFS);
519                 goto out;
520         }
521
522         memcpy(n->primary_key, pkey, key_len);
523         n->dev = dev;
524         dev_hold(dev);
525
526         /* Protocol specific setup. */
527         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
528                 rc = ERR_PTR(error);
529                 goto out_neigh_release;
530         }
531
532         if (dev->netdev_ops->ndo_neigh_construct) {
533                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
534                 if (error < 0) {
535                         rc = ERR_PTR(error);
536                         goto out_neigh_release;
537                 }
538         }
539
540         /* Device specific setup. */
541         if (n->parms->neigh_setup &&
542             (error = n->parms->neigh_setup(n)) < 0) {
543                 rc = ERR_PTR(error);
544                 goto out_neigh_release;
545         }
546
547         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
548
549         write_lock_bh(&tbl->lock);
550         nht = rcu_dereference_protected(tbl->nht,
551                                         lockdep_is_held(&tbl->lock));
552
553         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
554                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
555
556         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
557
558         if (n->parms->dead) {
559                 rc = ERR_PTR(-EINVAL);
560                 goto out_tbl_unlock;
561         }
562
563         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
564                                             lockdep_is_held(&tbl->lock));
565              n1 != NULL;
566              n1 = rcu_dereference_protected(n1->next,
567                         lockdep_is_held(&tbl->lock))) {
568                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
569                         if (want_ref)
570                                 neigh_hold(n1);
571                         rc = n1;
572                         goto out_tbl_unlock;
573                 }
574         }
575
576         n->dead = 0;
577         if (want_ref)
578                 neigh_hold(n);
579         rcu_assign_pointer(n->next,
580                            rcu_dereference_protected(nht->hash_buckets[hash_val],
581                                                      lockdep_is_held(&tbl->lock)));
582         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
583         write_unlock_bh(&tbl->lock);
584         neigh_dbg(2, "neigh %p is created\n", n);
585         rc = n;
586 out:
587         return rc;
588 out_tbl_unlock:
589         write_unlock_bh(&tbl->lock);
590 out_neigh_release:
591         neigh_release(n);
592         goto out;
593 }
594 EXPORT_SYMBOL(__neigh_create);
595
596 static u32 pneigh_hash(const void *pkey, int key_len)
597 {
598         u32 hash_val = *(u32 *)(pkey + key_len - 4);
599         hash_val ^= (hash_val >> 16);
600         hash_val ^= hash_val >> 8;
601         hash_val ^= hash_val >> 4;
602         hash_val &= PNEIGH_HASHMASK;
603         return hash_val;
604 }
605
606 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
607                                               struct net *net,
608                                               const void *pkey,
609                                               int key_len,
610                                               struct net_device *dev)
611 {
612         while (n) {
613                 if (!memcmp(n->key, pkey, key_len) &&
614                     net_eq(pneigh_net(n), net) &&
615                     (n->dev == dev || !n->dev))
616                         return n;
617                 n = n->next;
618         }
619         return NULL;
620 }
621
622 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
623                 struct net *net, const void *pkey, struct net_device *dev)
624 {
625         int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
629                                  net, pkey, key_len, dev);
630 }
631 EXPORT_SYMBOL_GPL(__pneigh_lookup);
632
633 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
634                                     struct net *net, const void *pkey,
635                                     struct net_device *dev, int creat)
636 {
637         struct pneigh_entry *n;
638         int key_len = tbl->key_len;
639         u32 hash_val = pneigh_hash(pkey, key_len);
640
641         read_lock_bh(&tbl->lock);
642         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
643                               net, pkey, key_len, dev);
644         read_unlock_bh(&tbl->lock);
645
646         if (n || !creat)
647                 goto out;
648
649         ASSERT_RTNL();
650
651         n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
652         if (!n)
653                 goto out;
654
655         write_pnet(&n->net, net);
656         memcpy(n->key, pkey, key_len);
657         n->dev = dev;
658         if (dev)
659                 dev_hold(dev);
660
661         if (tbl->pconstructor && tbl->pconstructor(n)) {
662                 if (dev)
663                         dev_put(dev);
664                 kfree(n);
665                 n = NULL;
666                 goto out;
667         }
668
669         write_lock_bh(&tbl->lock);
670         n->next = tbl->phash_buckets[hash_val];
671         tbl->phash_buckets[hash_val] = n;
672         write_unlock_bh(&tbl->lock);
673 out:
674         return n;
675 }
676 EXPORT_SYMBOL(pneigh_lookup);
677
678
679 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
680                   struct net_device *dev)
681 {
682         struct pneigh_entry *n, **np;
683         int key_len = tbl->key_len;
684         u32 hash_val = pneigh_hash(pkey, key_len);
685
686         write_lock_bh(&tbl->lock);
687         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
688              np = &n->next) {
689                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
690                     net_eq(pneigh_net(n), net)) {
691                         *np = n->next;
692                         write_unlock_bh(&tbl->lock);
693                         if (tbl->pdestructor)
694                                 tbl->pdestructor(n);
695                         if (n->dev)
696                                 dev_put(n->dev);
697                         kfree(n);
698                         return 0;
699                 }
700         }
701         write_unlock_bh(&tbl->lock);
702         return -ENOENT;
703 }
704
705 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
706                                     struct net_device *dev)
707 {
708         struct pneigh_entry *n, **np, *freelist = NULL;
709         u32 h;
710
711         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
712                 np = &tbl->phash_buckets[h];
713                 while ((n = *np) != NULL) {
714                         if (!dev || n->dev == dev) {
715                                 *np = n->next;
716                                 n->next = freelist;
717                                 freelist = n;
718                                 continue;
719                         }
720                         np = &n->next;
721                 }
722         }
723         write_unlock_bh(&tbl->lock);
724         while ((n = freelist)) {
725                 freelist = n->next;
726                 n->next = NULL;
727                 if (tbl->pdestructor)
728                         tbl->pdestructor(n);
729                 if (n->dev)
730                         dev_put(n->dev);
731                 kfree(n);
732         }
733         return -ENOENT;
734 }
735
736 static void neigh_parms_destroy(struct neigh_parms *parms);
737
738 static inline void neigh_parms_put(struct neigh_parms *parms)
739 {
740         if (refcount_dec_and_test(&parms->refcnt))
741                 neigh_parms_destroy(parms);
742 }
743
744 /*
745  *      neighbour must already be out of the table;
746  *
747  */
748 void neigh_destroy(struct neighbour *neigh)
749 {
750         struct net_device *dev = neigh->dev;
751
752         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
753
754         if (!neigh->dead) {
755                 pr_warn("Destroying alive neighbour %p\n", neigh);
756                 dump_stack();
757                 return;
758         }
759
760         if (neigh_del_timer(neigh))
761                 pr_warn("Impossible event\n");
762
763         write_lock_bh(&neigh->lock);
764         __skb_queue_purge(&neigh->arp_queue);
765         write_unlock_bh(&neigh->lock);
766         neigh->arp_queue_len_bytes = 0;
767
768         if (dev->netdev_ops->ndo_neigh_destroy)
769                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
770
771         dev_put(dev);
772         neigh_parms_put(neigh->parms);
773
774         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
775
776         atomic_dec(&neigh->tbl->entries);
777         kfree_rcu(neigh, rcu);
778 }
779 EXPORT_SYMBOL(neigh_destroy);
780
781 /* Neighbour state is suspicious;
782    disable fast path.
783
784    Called with write_locked neigh.
785  */
786 static void neigh_suspect(struct neighbour *neigh)
787 {
788         neigh_dbg(2, "neigh %p is suspected\n", neigh);
789
790         neigh->output = neigh->ops->output;
791 }
792
793 /* Neighbour state is OK;
794    enable fast path.
795
796    Called with write_locked neigh.
797  */
798 static void neigh_connect(struct neighbour *neigh)
799 {
800         neigh_dbg(2, "neigh %p is connected\n", neigh);
801
802         neigh->output = neigh->ops->connected_output;
803 }
804
805 static void neigh_periodic_work(struct work_struct *work)
806 {
807         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
808         struct neighbour *n;
809         struct neighbour __rcu **np;
810         unsigned int i;
811         struct neigh_hash_table *nht;
812
813         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
814
815         write_lock_bh(&tbl->lock);
816         nht = rcu_dereference_protected(tbl->nht,
817                                         lockdep_is_held(&tbl->lock));
818
819         /*
820          *      periodically recompute ReachableTime from random function
821          */
822
823         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
824                 struct neigh_parms *p;
825                 tbl->last_rand = jiffies;
826                 list_for_each_entry(p, &tbl->parms_list, list)
827                         p->reachable_time =
828                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
829         }
830
831         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
832                 goto out;
833
834         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
835                 np = &nht->hash_buckets[i];
836
837                 while ((n = rcu_dereference_protected(*np,
838                                 lockdep_is_held(&tbl->lock))) != NULL) {
839                         unsigned int state;
840
841                         write_lock(&n->lock);
842
843                         state = n->nud_state;
844                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
845                                 write_unlock(&n->lock);
846                                 goto next_elt;
847                         }
848
849                         if (time_before(n->used, n->confirmed))
850                                 n->used = n->confirmed;
851
852                         if (refcount_read(&n->refcnt) == 1 &&
853                             (state == NUD_FAILED ||
854                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
855                                 *np = n->next;
856                                 n->dead = 1;
857                                 write_unlock(&n->lock);
858                                 neigh_cleanup_and_release(n);
859                                 continue;
860                         }
861                         write_unlock(&n->lock);
862
863 next_elt:
864                         np = &n->next;
865                 }
866                 /*
867                  * It's fine to release lock here, even if hash table
868                  * grows while we are preempted.
869                  */
870                 write_unlock_bh(&tbl->lock);
871                 cond_resched();
872                 write_lock_bh(&tbl->lock);
873                 nht = rcu_dereference_protected(tbl->nht,
874                                                 lockdep_is_held(&tbl->lock));
875         }
876 out:
877         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
878          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
879          * BASE_REACHABLE_TIME.
880          */
881         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
882                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
883         write_unlock_bh(&tbl->lock);
884 }
885
886 static __inline__ int neigh_max_probes(struct neighbour *n)
887 {
888         struct neigh_parms *p = n->parms;
889         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
890                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
891                 NEIGH_VAR(p, MCAST_PROBES));
892 }
893
894 static void neigh_invalidate(struct neighbour *neigh)
895         __releases(neigh->lock)
896         __acquires(neigh->lock)
897 {
898         struct sk_buff *skb;
899
900         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
901         neigh_dbg(2, "neigh %p is failed\n", neigh);
902         neigh->updated = jiffies;
903
904         /* It is very thin place. report_unreachable is very complicated
905            routine. Particularly, it can hit the same neighbour entry!
906
907            So that, we try to be accurate and avoid dead loop. --ANK
908          */
909         while (neigh->nud_state == NUD_FAILED &&
910                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
911                 write_unlock(&neigh->lock);
912                 neigh->ops->error_report(neigh, skb);
913                 write_lock(&neigh->lock);
914         }
915         __skb_queue_purge(&neigh->arp_queue);
916         neigh->arp_queue_len_bytes = 0;
917 }
918
919 static void neigh_probe(struct neighbour *neigh)
920         __releases(neigh->lock)
921 {
922         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
923         /* keep skb alive even if arp_queue overflows */
924         if (skb)
925                 skb = skb_clone(skb, GFP_ATOMIC);
926         write_unlock(&neigh->lock);
927         if (neigh->ops->solicit)
928                 neigh->ops->solicit(neigh, skb);
929         atomic_inc(&neigh->probes);
930         kfree_skb(skb);
931 }
932
933 /* Called when a timer expires for a neighbour entry. */
934
935 static void neigh_timer_handler(unsigned long arg)
936 {
937         unsigned long now, next;
938         struct neighbour *neigh = (struct neighbour *)arg;
939         unsigned int state;
940         int notify = 0;
941
942         write_lock(&neigh->lock);
943
944         state = neigh->nud_state;
945         now = jiffies;
946         next = now + HZ;
947
948         if (!(state & NUD_IN_TIMER))
949                 goto out;
950
951         if (state & NUD_REACHABLE) {
952                 if (time_before_eq(now,
953                                    neigh->confirmed + neigh->parms->reachable_time)) {
954                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
955                         next = neigh->confirmed + neigh->parms->reachable_time;
956                 } else if (time_before_eq(now,
957                                           neigh->used +
958                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
959                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
960                         neigh->nud_state = NUD_DELAY;
961                         neigh->updated = jiffies;
962                         neigh_suspect(neigh);
963                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
964                 } else {
965                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
966                         neigh->nud_state = NUD_STALE;
967                         neigh->updated = jiffies;
968                         neigh_suspect(neigh);
969                         notify = 1;
970                 }
971         } else if (state & NUD_DELAY) {
972                 if (time_before_eq(now,
973                                    neigh->confirmed +
974                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
975                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
976                         neigh->nud_state = NUD_REACHABLE;
977                         neigh->updated = jiffies;
978                         neigh_connect(neigh);
979                         notify = 1;
980                         next = neigh->confirmed + neigh->parms->reachable_time;
981                 } else {
982                         neigh_dbg(2, "neigh %p is probed\n", neigh);
983                         neigh->nud_state = NUD_PROBE;
984                         neigh->updated = jiffies;
985                         atomic_set(&neigh->probes, 0);
986                         notify = 1;
987                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
988                 }
989         } else {
990                 /* NUD_PROBE|NUD_INCOMPLETE */
991                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
992         }
993
994         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
995             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
996                 neigh->nud_state = NUD_FAILED;
997                 notify = 1;
998                 neigh_invalidate(neigh);
999                 goto out;
1000         }
1001
1002         if (neigh->nud_state & NUD_IN_TIMER) {
1003                 if (time_before(next, jiffies + HZ/2))
1004                         next = jiffies + HZ/2;
1005                 if (!mod_timer(&neigh->timer, next))
1006                         neigh_hold(neigh);
1007         }
1008         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1009                 neigh_probe(neigh);
1010         } else {
1011 out:
1012                 write_unlock(&neigh->lock);
1013         }
1014
1015         if (notify)
1016                 neigh_update_notify(neigh, 0);
1017
1018         neigh_release(neigh);
1019 }
1020
1021 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1022 {
1023         int rc;
1024         bool immediate_probe = false;
1025
1026         write_lock_bh(&neigh->lock);
1027
1028         rc = 0;
1029         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1030                 goto out_unlock_bh;
1031         if (neigh->dead)
1032                 goto out_dead;
1033
1034         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1035                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1036                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1037                         unsigned long next, now = jiffies;
1038
1039                         atomic_set(&neigh->probes,
1040                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1041                         neigh_del_timer(neigh);
1042                         neigh->nud_state     = NUD_INCOMPLETE;
1043                         neigh->updated = now;
1044                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1045                                          HZ/2);
1046                         neigh_add_timer(neigh, next);
1047                         immediate_probe = true;
1048                 } else {
1049                         neigh->nud_state = NUD_FAILED;
1050                         neigh->updated = jiffies;
1051                         write_unlock_bh(&neigh->lock);
1052
1053                         kfree_skb(skb);
1054                         return 1;
1055                 }
1056         } else if (neigh->nud_state & NUD_STALE) {
1057                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058                 neigh_del_timer(neigh);
1059                 neigh->nud_state = NUD_DELAY;
1060                 neigh->updated = jiffies;
1061                 neigh_add_timer(neigh, jiffies +
1062                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1063         }
1064
1065         if (neigh->nud_state == NUD_INCOMPLETE) {
1066                 if (skb) {
1067                         while (neigh->arp_queue_len_bytes + skb->truesize >
1068                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1069                                 struct sk_buff *buff;
1070
1071                                 buff = __skb_dequeue(&neigh->arp_queue);
1072                                 if (!buff)
1073                                         break;
1074                                 neigh->arp_queue_len_bytes -= buff->truesize;
1075                                 kfree_skb(buff);
1076                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1077                         }
1078                         skb_dst_force(skb);
1079                         __skb_queue_tail(&neigh->arp_queue, skb);
1080                         neigh->arp_queue_len_bytes += skb->truesize;
1081                 }
1082                 rc = 1;
1083         }
1084 out_unlock_bh:
1085         if (immediate_probe)
1086                 neigh_probe(neigh);
1087         else
1088                 write_unlock(&neigh->lock);
1089         local_bh_enable();
1090         return rc;
1091
1092 out_dead:
1093         if (neigh->nud_state & NUD_STALE)
1094                 goto out_unlock_bh;
1095         write_unlock_bh(&neigh->lock);
1096         kfree_skb(skb);
1097         return 1;
1098 }
1099 EXPORT_SYMBOL(__neigh_event_send);
1100
1101 static void neigh_update_hhs(struct neighbour *neigh)
1102 {
1103         struct hh_cache *hh;
1104         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1105                 = NULL;
1106
1107         if (neigh->dev->header_ops)
1108                 update = neigh->dev->header_ops->cache_update;
1109
1110         if (update) {
1111                 hh = &neigh->hh;
1112                 if (READ_ONCE(hh->hh_len)) {
1113                         write_seqlock_bh(&hh->hh_lock);
1114                         update(hh, neigh->dev, neigh->ha);
1115                         write_sequnlock_bh(&hh->hh_lock);
1116                 }
1117         }
1118 }
1119
1120
1121
1122 /* Generic update routine.
1123    -- lladdr is new lladdr or NULL, if it is not supplied.
1124    -- new    is new state.
1125    -- flags
1126         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1127                                 if it is different.
1128         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1129                                 lladdr instead of overriding it
1130                                 if it is different.
1131         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1132
1133         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1134                                 NTF_ROUTER flag.
1135         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1136                                 a router.
1137
1138    Caller MUST hold reference count on the entry.
1139  */
1140
1141 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1142                  u32 flags, u32 nlmsg_pid)
1143 {
1144         u8 old;
1145         int err;
1146         int notify = 0;
1147         struct net_device *dev;
1148         int update_isrouter = 0;
1149
1150         write_lock_bh(&neigh->lock);
1151
1152         dev    = neigh->dev;
1153         old    = neigh->nud_state;
1154         err    = -EPERM;
1155
1156         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1157             (old & (NUD_NOARP | NUD_PERMANENT)))
1158                 goto out;
1159         if (neigh->dead)
1160                 goto out;
1161
1162         if (!(new & NUD_VALID)) {
1163                 neigh_del_timer(neigh);
1164                 if (old & NUD_CONNECTED)
1165                         neigh_suspect(neigh);
1166                 neigh->nud_state = new;
1167                 err = 0;
1168                 notify = old & NUD_VALID;
1169                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1170                     (new & NUD_FAILED)) {
1171                         neigh_invalidate(neigh);
1172                         notify = 1;
1173                 }
1174                 goto out;
1175         }
1176
1177         /* Compare new lladdr with cached one */
1178         if (!dev->addr_len) {
1179                 /* First case: device needs no address. */
1180                 lladdr = neigh->ha;
1181         } else if (lladdr) {
1182                 /* The second case: if something is already cached
1183                    and a new address is proposed:
1184                    - compare new & old
1185                    - if they are different, check override flag
1186                  */
1187                 if ((old & NUD_VALID) &&
1188                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1189                         lladdr = neigh->ha;
1190         } else {
1191                 /* No address is supplied; if we know something,
1192                    use it, otherwise discard the request.
1193                  */
1194                 err = -EINVAL;
1195                 if (!(old & NUD_VALID))
1196                         goto out;
1197                 lladdr = neigh->ha;
1198         }
1199
1200         /* Update confirmed timestamp for neighbour entry after we
1201          * received ARP packet even if it doesn't change IP to MAC binding.
1202          */
1203         if (new & NUD_CONNECTED)
1204                 neigh->confirmed = jiffies;
1205
1206         /* If entry was valid and address is not changed,
1207            do not change entry state, if new one is STALE.
1208          */
1209         err = 0;
1210         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1211         if (old & NUD_VALID) {
1212                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1213                         update_isrouter = 0;
1214                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1215                             (old & NUD_CONNECTED)) {
1216                                 lladdr = neigh->ha;
1217                                 new = NUD_STALE;
1218                         } else
1219                                 goto out;
1220                 } else {
1221                         if (lladdr == neigh->ha && new == NUD_STALE &&
1222                             !(flags & NEIGH_UPDATE_F_ADMIN))
1223                                 new = old;
1224                 }
1225         }
1226
1227         /* Update timestamp only once we know we will make a change to the
1228          * neighbour entry. Otherwise we risk to move the locktime window with
1229          * noop updates and ignore relevant ARP updates.
1230          */
1231         if (new != old || lladdr != neigh->ha)
1232                 neigh->updated = jiffies;
1233
1234         if (new != old) {
1235                 neigh_del_timer(neigh);
1236                 if (new & NUD_PROBE)
1237                         atomic_set(&neigh->probes, 0);
1238                 if (new & NUD_IN_TIMER)
1239                         neigh_add_timer(neigh, (jiffies +
1240                                                 ((new & NUD_REACHABLE) ?
1241                                                  neigh->parms->reachable_time :
1242                                                  0)));
1243                 neigh->nud_state = new;
1244                 notify = 1;
1245         }
1246
1247         if (lladdr != neigh->ha) {
1248                 write_seqlock(&neigh->ha_lock);
1249                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1250                 write_sequnlock(&neigh->ha_lock);
1251                 neigh_update_hhs(neigh);
1252                 if (!(new & NUD_CONNECTED))
1253                         neigh->confirmed = jiffies -
1254                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1255                 notify = 1;
1256         }
1257         if (new == old)
1258                 goto out;
1259         if (new & NUD_CONNECTED)
1260                 neigh_connect(neigh);
1261         else
1262                 neigh_suspect(neigh);
1263         if (!(old & NUD_VALID)) {
1264                 struct sk_buff *skb;
1265
1266                 /* Again: avoid dead loop if something went wrong */
1267
1268                 while (neigh->nud_state & NUD_VALID &&
1269                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1270                         struct dst_entry *dst = skb_dst(skb);
1271                         struct neighbour *n2, *n1 = neigh;
1272                         write_unlock_bh(&neigh->lock);
1273
1274                         rcu_read_lock();
1275
1276                         /* Why not just use 'neigh' as-is?  The problem is that
1277                          * things such as shaper, eql, and sch_teql can end up
1278                          * using alternative, different, neigh objects to output
1279                          * the packet in the output path.  So what we need to do
1280                          * here is re-lookup the top-level neigh in the path so
1281                          * we can reinject the packet there.
1282                          */
1283                         n2 = NULL;
1284                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1285                                 n2 = dst_neigh_lookup_skb(dst, skb);
1286                                 if (n2)
1287                                         n1 = n2;
1288                         }
1289                         n1->output(n1, skb);
1290                         if (n2)
1291                                 neigh_release(n2);
1292                         rcu_read_unlock();
1293
1294                         write_lock_bh(&neigh->lock);
1295                 }
1296                 __skb_queue_purge(&neigh->arp_queue);
1297                 neigh->arp_queue_len_bytes = 0;
1298         }
1299 out:
1300         if (update_isrouter) {
1301                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1302                         (neigh->flags | NTF_ROUTER) :
1303                         (neigh->flags & ~NTF_ROUTER);
1304         }
1305         write_unlock_bh(&neigh->lock);
1306
1307         if (notify)
1308                 neigh_update_notify(neigh, nlmsg_pid);
1309
1310         return err;
1311 }
1312 EXPORT_SYMBOL(neigh_update);
1313
1314 /* Update the neigh to listen temporarily for probe responses, even if it is
1315  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1316  */
1317 void __neigh_set_probe_once(struct neighbour *neigh)
1318 {
1319         if (neigh->dead)
1320                 return;
1321         neigh->updated = jiffies;
1322         if (!(neigh->nud_state & NUD_FAILED))
1323                 return;
1324         neigh->nud_state = NUD_INCOMPLETE;
1325         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1326         neigh_add_timer(neigh,
1327                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1328 }
1329 EXPORT_SYMBOL(__neigh_set_probe_once);
1330
1331 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1332                                  u8 *lladdr, void *saddr,
1333                                  struct net_device *dev)
1334 {
1335         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1336                                                  lladdr || !dev->addr_len);
1337         if (neigh)
1338                 neigh_update(neigh, lladdr, NUD_STALE,
1339                              NEIGH_UPDATE_F_OVERRIDE, 0);
1340         return neigh;
1341 }
1342 EXPORT_SYMBOL(neigh_event_ns);
1343
1344 /* called with read_lock_bh(&n->lock); */
1345 static void neigh_hh_init(struct neighbour *n)
1346 {
1347         struct net_device *dev = n->dev;
1348         __be16 prot = n->tbl->protocol;
1349         struct hh_cache *hh = &n->hh;
1350
1351         write_lock_bh(&n->lock);
1352
1353         /* Only one thread can come in here and initialize the
1354          * hh_cache entry.
1355          */
1356         if (!hh->hh_len)
1357                 dev->header_ops->cache(n, hh, prot);
1358
1359         write_unlock_bh(&n->lock);
1360 }
1361
1362 /* Slow and careful. */
1363
1364 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1365 {
1366         int rc = 0;
1367
1368         if (!neigh_event_send(neigh, skb)) {
1369                 int err;
1370                 struct net_device *dev = neigh->dev;
1371                 unsigned int seq;
1372
1373                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1374                         neigh_hh_init(neigh);
1375
1376                 do {
1377                         __skb_pull(skb, skb_network_offset(skb));
1378                         seq = read_seqbegin(&neigh->ha_lock);
1379                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1380                                               neigh->ha, NULL, skb->len);
1381                 } while (read_seqretry(&neigh->ha_lock, seq));
1382
1383                 if (err >= 0)
1384                         rc = dev_queue_xmit(skb);
1385                 else
1386                         goto out_kfree_skb;
1387         }
1388 out:
1389         return rc;
1390 out_kfree_skb:
1391         rc = -EINVAL;
1392         kfree_skb(skb);
1393         goto out;
1394 }
1395 EXPORT_SYMBOL(neigh_resolve_output);
1396
1397 /* As fast as possible without hh cache */
1398
1399 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1400 {
1401         struct net_device *dev = neigh->dev;
1402         unsigned int seq;
1403         int err;
1404
1405         do {
1406                 __skb_pull(skb, skb_network_offset(skb));
1407                 seq = read_seqbegin(&neigh->ha_lock);
1408                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1409                                       neigh->ha, NULL, skb->len);
1410         } while (read_seqretry(&neigh->ha_lock, seq));
1411
1412         if (err >= 0)
1413                 err = dev_queue_xmit(skb);
1414         else {
1415                 err = -EINVAL;
1416                 kfree_skb(skb);
1417         }
1418         return err;
1419 }
1420 EXPORT_SYMBOL(neigh_connected_output);
1421
1422 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1423 {
1424         return dev_queue_xmit(skb);
1425 }
1426 EXPORT_SYMBOL(neigh_direct_output);
1427
1428 static void neigh_proxy_process(unsigned long arg)
1429 {
1430         struct neigh_table *tbl = (struct neigh_table *)arg;
1431         long sched_next = 0;
1432         unsigned long now = jiffies;
1433         struct sk_buff *skb, *n;
1434
1435         spin_lock(&tbl->proxy_queue.lock);
1436
1437         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1438                 long tdif = NEIGH_CB(skb)->sched_next - now;
1439
1440                 if (tdif <= 0) {
1441                         struct net_device *dev = skb->dev;
1442
1443                         __skb_unlink(skb, &tbl->proxy_queue);
1444                         if (tbl->proxy_redo && netif_running(dev)) {
1445                                 rcu_read_lock();
1446                                 tbl->proxy_redo(skb);
1447                                 rcu_read_unlock();
1448                         } else {
1449                                 kfree_skb(skb);
1450                         }
1451
1452                         dev_put(dev);
1453                 } else if (!sched_next || tdif < sched_next)
1454                         sched_next = tdif;
1455         }
1456         del_timer(&tbl->proxy_timer);
1457         if (sched_next)
1458                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1459         spin_unlock(&tbl->proxy_queue.lock);
1460 }
1461
1462 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1463                     struct sk_buff *skb)
1464 {
1465         unsigned long now = jiffies;
1466
1467         unsigned long sched_next = now + (prandom_u32() %
1468                                           NEIGH_VAR(p, PROXY_DELAY));
1469
1470         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1471                 kfree_skb(skb);
1472                 return;
1473         }
1474
1475         NEIGH_CB(skb)->sched_next = sched_next;
1476         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1477
1478         spin_lock(&tbl->proxy_queue.lock);
1479         if (del_timer(&tbl->proxy_timer)) {
1480                 if (time_before(tbl->proxy_timer.expires, sched_next))
1481                         sched_next = tbl->proxy_timer.expires;
1482         }
1483         skb_dst_drop(skb);
1484         dev_hold(skb->dev);
1485         __skb_queue_tail(&tbl->proxy_queue, skb);
1486         mod_timer(&tbl->proxy_timer, sched_next);
1487         spin_unlock(&tbl->proxy_queue.lock);
1488 }
1489 EXPORT_SYMBOL(pneigh_enqueue);
1490
1491 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1492                                                       struct net *net, int ifindex)
1493 {
1494         struct neigh_parms *p;
1495
1496         list_for_each_entry(p, &tbl->parms_list, list) {
1497                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1498                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1499                         return p;
1500         }
1501
1502         return NULL;
1503 }
1504
1505 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1506                                       struct neigh_table *tbl)
1507 {
1508         struct neigh_parms *p;
1509         struct net *net = dev_net(dev);
1510         const struct net_device_ops *ops = dev->netdev_ops;
1511
1512         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1513         if (p) {
1514                 p->tbl            = tbl;
1515                 refcount_set(&p->refcnt, 1);
1516                 p->reachable_time =
1517                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1518                 dev_hold(dev);
1519                 p->dev = dev;
1520                 write_pnet(&p->net, net);
1521                 p->sysctl_table = NULL;
1522
1523                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1524                         dev_put(dev);
1525                         kfree(p);
1526                         return NULL;
1527                 }
1528
1529                 write_lock_bh(&tbl->lock);
1530                 list_add(&p->list, &tbl->parms.list);
1531                 write_unlock_bh(&tbl->lock);
1532
1533                 neigh_parms_data_state_cleanall(p);
1534         }
1535         return p;
1536 }
1537 EXPORT_SYMBOL(neigh_parms_alloc);
1538
1539 static void neigh_rcu_free_parms(struct rcu_head *head)
1540 {
1541         struct neigh_parms *parms =
1542                 container_of(head, struct neigh_parms, rcu_head);
1543
1544         neigh_parms_put(parms);
1545 }
1546
1547 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1548 {
1549         if (!parms || parms == &tbl->parms)
1550                 return;
1551         write_lock_bh(&tbl->lock);
1552         list_del(&parms->list);
1553         parms->dead = 1;
1554         write_unlock_bh(&tbl->lock);
1555         if (parms->dev)
1556                 dev_put(parms->dev);
1557         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1558 }
1559 EXPORT_SYMBOL(neigh_parms_release);
1560
1561 static void neigh_parms_destroy(struct neigh_parms *parms)
1562 {
1563         kfree(parms);
1564 }
1565
1566 static struct lock_class_key neigh_table_proxy_queue_class;
1567
1568 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1569
1570 void neigh_table_init(int index, struct neigh_table *tbl)
1571 {
1572         unsigned long now = jiffies;
1573         unsigned long phsize;
1574
1575         INIT_LIST_HEAD(&tbl->parms_list);
1576         list_add(&tbl->parms.list, &tbl->parms_list);
1577         write_pnet(&tbl->parms.net, &init_net);
1578         refcount_set(&tbl->parms.refcnt, 1);
1579         tbl->parms.reachable_time =
1580                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1581
1582         tbl->stats = alloc_percpu(struct neigh_statistics);
1583         if (!tbl->stats)
1584                 panic("cannot create neighbour cache statistics");
1585
1586 #ifdef CONFIG_PROC_FS
1587         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1588                               &neigh_stat_seq_fops, tbl))
1589                 panic("cannot create neighbour proc dir entry");
1590 #endif
1591
1592         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1593
1594         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1595         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1596
1597         if (!tbl->nht || !tbl->phash_buckets)
1598                 panic("cannot allocate neighbour cache hashes");
1599
1600         if (!tbl->entry_size)
1601                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1602                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1603         else
1604                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1605
1606         rwlock_init(&tbl->lock);
1607         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1608         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1609                         tbl->parms.reachable_time);
1610         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1611         skb_queue_head_init_class(&tbl->proxy_queue,
1612                         &neigh_table_proxy_queue_class);
1613
1614         tbl->last_flush = now;
1615         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1616
1617         neigh_tables[index] = tbl;
1618 }
1619 EXPORT_SYMBOL(neigh_table_init);
1620
1621 int neigh_table_clear(int index, struct neigh_table *tbl)
1622 {
1623         neigh_tables[index] = NULL;
1624         /* It is not clean... Fix it to unload IPv6 module safely */
1625         cancel_delayed_work_sync(&tbl->gc_work);
1626         del_timer_sync(&tbl->proxy_timer);
1627         pneigh_queue_purge(&tbl->proxy_queue, NULL);
1628         neigh_ifdown(tbl, NULL);
1629         if (atomic_read(&tbl->entries))
1630                 pr_crit("neighbour leakage\n");
1631
1632         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1633                  neigh_hash_free_rcu);
1634         tbl->nht = NULL;
1635
1636         kfree(tbl->phash_buckets);
1637         tbl->phash_buckets = NULL;
1638
1639         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1640
1641         free_percpu(tbl->stats);
1642         tbl->stats = NULL;
1643
1644         return 0;
1645 }
1646 EXPORT_SYMBOL(neigh_table_clear);
1647
1648 static struct neigh_table *neigh_find_table(int family)
1649 {
1650         struct neigh_table *tbl = NULL;
1651
1652         switch (family) {
1653         case AF_INET:
1654                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1655                 break;
1656         case AF_INET6:
1657                 tbl = neigh_tables[NEIGH_ND_TABLE];
1658                 break;
1659         case AF_DECnet:
1660                 tbl = neigh_tables[NEIGH_DN_TABLE];
1661                 break;
1662         }
1663
1664         return tbl;
1665 }
1666
1667 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1668                         struct netlink_ext_ack *extack)
1669 {
1670         struct net *net = sock_net(skb->sk);
1671         struct ndmsg *ndm;
1672         struct nlattr *dst_attr;
1673         struct neigh_table *tbl;
1674         struct neighbour *neigh;
1675         struct net_device *dev = NULL;
1676         int err = -EINVAL;
1677
1678         ASSERT_RTNL();
1679         if (nlmsg_len(nlh) < sizeof(*ndm))
1680                 goto out;
1681
1682         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1683         if (dst_attr == NULL)
1684                 goto out;
1685
1686         ndm = nlmsg_data(nlh);
1687         if (ndm->ndm_ifindex) {
1688                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1689                 if (dev == NULL) {
1690                         err = -ENODEV;
1691                         goto out;
1692                 }
1693         }
1694
1695         tbl = neigh_find_table(ndm->ndm_family);
1696         if (tbl == NULL)
1697                 return -EAFNOSUPPORT;
1698
1699         if (nla_len(dst_attr) < tbl->key_len)
1700                 goto out;
1701
1702         if (ndm->ndm_flags & NTF_PROXY) {
1703                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1704                 goto out;
1705         }
1706
1707         if (dev == NULL)
1708                 goto out;
1709
1710         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1711         if (neigh == NULL) {
1712                 err = -ENOENT;
1713                 goto out;
1714         }
1715
1716         err = neigh_update(neigh, NULL, NUD_FAILED,
1717                            NEIGH_UPDATE_F_OVERRIDE |
1718                            NEIGH_UPDATE_F_ADMIN,
1719                            NETLINK_CB(skb).portid);
1720         write_lock_bh(&tbl->lock);
1721         neigh_release(neigh);
1722         neigh_remove_one(neigh, tbl);
1723         write_unlock_bh(&tbl->lock);
1724
1725 out:
1726         return err;
1727 }
1728
1729 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1730                      struct netlink_ext_ack *extack)
1731 {
1732         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1733         struct net *net = sock_net(skb->sk);
1734         struct ndmsg *ndm;
1735         struct nlattr *tb[NDA_MAX+1];
1736         struct neigh_table *tbl;
1737         struct net_device *dev = NULL;
1738         struct neighbour *neigh;
1739         void *dst, *lladdr;
1740         int err;
1741
1742         ASSERT_RTNL();
1743         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1744         if (err < 0)
1745                 goto out;
1746
1747         err = -EINVAL;
1748         if (tb[NDA_DST] == NULL)
1749                 goto out;
1750
1751         ndm = nlmsg_data(nlh);
1752         if (ndm->ndm_ifindex) {
1753                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1754                 if (dev == NULL) {
1755                         err = -ENODEV;
1756                         goto out;
1757                 }
1758
1759                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1760                         goto out;
1761         }
1762
1763         tbl = neigh_find_table(ndm->ndm_family);
1764         if (tbl == NULL)
1765                 return -EAFNOSUPPORT;
1766
1767         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1768                 goto out;
1769         dst = nla_data(tb[NDA_DST]);
1770         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1771
1772         if (ndm->ndm_flags & NTF_PROXY) {
1773                 struct pneigh_entry *pn;
1774
1775                 err = -ENOBUFS;
1776                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1777                 if (pn) {
1778                         pn->flags = ndm->ndm_flags;
1779                         err = 0;
1780                 }
1781                 goto out;
1782         }
1783
1784         if (dev == NULL)
1785                 goto out;
1786
1787         neigh = neigh_lookup(tbl, dst, dev);
1788         if (neigh == NULL) {
1789                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1790                         err = -ENOENT;
1791                         goto out;
1792                 }
1793
1794                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1795                 if (IS_ERR(neigh)) {
1796                         err = PTR_ERR(neigh);
1797                         goto out;
1798                 }
1799         } else {
1800                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1801                         err = -EEXIST;
1802                         neigh_release(neigh);
1803                         goto out;
1804                 }
1805
1806                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1807                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1808         }
1809
1810         if (ndm->ndm_flags & NTF_USE) {
1811                 neigh_event_send(neigh, NULL);
1812                 err = 0;
1813         } else
1814                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1815                                    NETLINK_CB(skb).portid);
1816         neigh_release(neigh);
1817
1818 out:
1819         return err;
1820 }
1821
1822 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1823 {
1824         struct nlattr *nest;
1825
1826         nest = nla_nest_start(skb, NDTA_PARMS);
1827         if (nest == NULL)
1828                 return -ENOBUFS;
1829
1830         if ((parms->dev &&
1831              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1832             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1833             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1834                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1835             /* approximative value for deprecated QUEUE_LEN (in packets) */
1836             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1837                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1838             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1839             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1840             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1841                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1842             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1843                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1844             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1845                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1846             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1847                           NDTPA_PAD) ||
1848             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1849                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1850             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1851                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1852             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1853                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1854             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1855                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1856             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1857                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1858             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1859                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1860             nla_put_msecs(skb, NDTPA_LOCKTIME,
1861                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1862                 goto nla_put_failure;
1863         return nla_nest_end(skb, nest);
1864
1865 nla_put_failure:
1866         nla_nest_cancel(skb, nest);
1867         return -EMSGSIZE;
1868 }
1869
1870 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1871                               u32 pid, u32 seq, int type, int flags)
1872 {
1873         struct nlmsghdr *nlh;
1874         struct ndtmsg *ndtmsg;
1875
1876         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1877         if (nlh == NULL)
1878                 return -EMSGSIZE;
1879
1880         ndtmsg = nlmsg_data(nlh);
1881
1882         read_lock_bh(&tbl->lock);
1883         ndtmsg->ndtm_family = tbl->family;
1884         ndtmsg->ndtm_pad1   = 0;
1885         ndtmsg->ndtm_pad2   = 0;
1886
1887         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1888             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1889             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1890             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1891             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1892                 goto nla_put_failure;
1893         {
1894                 unsigned long now = jiffies;
1895                 long flush_delta = now - tbl->last_flush;
1896                 long rand_delta = now - tbl->last_rand;
1897                 struct neigh_hash_table *nht;
1898                 struct ndt_config ndc = {
1899                         .ndtc_key_len           = tbl->key_len,
1900                         .ndtc_entry_size        = tbl->entry_size,
1901                         .ndtc_entries           = atomic_read(&tbl->entries),
1902                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1903                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1904                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1905                 };
1906
1907                 rcu_read_lock_bh();
1908                 nht = rcu_dereference_bh(tbl->nht);
1909                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1910                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1911                 rcu_read_unlock_bh();
1912
1913                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1914                         goto nla_put_failure;
1915         }
1916
1917         {
1918                 int cpu;
1919                 struct ndt_stats ndst;
1920
1921                 memset(&ndst, 0, sizeof(ndst));
1922
1923                 for_each_possible_cpu(cpu) {
1924                         struct neigh_statistics *st;
1925
1926                         st = per_cpu_ptr(tbl->stats, cpu);
1927                         ndst.ndts_allocs                += st->allocs;
1928                         ndst.ndts_destroys              += st->destroys;
1929                         ndst.ndts_hash_grows            += st->hash_grows;
1930                         ndst.ndts_res_failed            += st->res_failed;
1931                         ndst.ndts_lookups               += st->lookups;
1932                         ndst.ndts_hits                  += st->hits;
1933                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1934                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1935                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1936                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1937                         ndst.ndts_table_fulls           += st->table_fulls;
1938                 }
1939
1940                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1941                                   NDTA_PAD))
1942                         goto nla_put_failure;
1943         }
1944
1945         BUG_ON(tbl->parms.dev);
1946         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1947                 goto nla_put_failure;
1948
1949         read_unlock_bh(&tbl->lock);
1950         nlmsg_end(skb, nlh);
1951         return 0;
1952
1953 nla_put_failure:
1954         read_unlock_bh(&tbl->lock);
1955         nlmsg_cancel(skb, nlh);
1956         return -EMSGSIZE;
1957 }
1958
1959 static int neightbl_fill_param_info(struct sk_buff *skb,
1960                                     struct neigh_table *tbl,
1961                                     struct neigh_parms *parms,
1962                                     u32 pid, u32 seq, int type,
1963                                     unsigned int flags)
1964 {
1965         struct ndtmsg *ndtmsg;
1966         struct nlmsghdr *nlh;
1967
1968         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1969         if (nlh == NULL)
1970                 return -EMSGSIZE;
1971
1972         ndtmsg = nlmsg_data(nlh);
1973
1974         read_lock_bh(&tbl->lock);
1975         ndtmsg->ndtm_family = tbl->family;
1976         ndtmsg->ndtm_pad1   = 0;
1977         ndtmsg->ndtm_pad2   = 0;
1978
1979         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1980             neightbl_fill_parms(skb, parms) < 0)
1981                 goto errout;
1982
1983         read_unlock_bh(&tbl->lock);
1984         nlmsg_end(skb, nlh);
1985         return 0;
1986 errout:
1987         read_unlock_bh(&tbl->lock);
1988         nlmsg_cancel(skb, nlh);
1989         return -EMSGSIZE;
1990 }
1991
1992 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1993         [NDTA_NAME]             = { .type = NLA_STRING },
1994         [NDTA_THRESH1]          = { .type = NLA_U32 },
1995         [NDTA_THRESH2]          = { .type = NLA_U32 },
1996         [NDTA_THRESH3]          = { .type = NLA_U32 },
1997         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1998         [NDTA_PARMS]            = { .type = NLA_NESTED },
1999 };
2000
2001 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2002         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
2003         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
2004         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
2005         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
2006         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2007         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2008         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2009         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2010         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2011         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2012         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2013         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2014         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2015         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2016 };
2017
2018 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2019                         struct netlink_ext_ack *extack)
2020 {
2021         struct net *net = sock_net(skb->sk);
2022         struct neigh_table *tbl;
2023         struct ndtmsg *ndtmsg;
2024         struct nlattr *tb[NDTA_MAX+1];
2025         bool found = false;
2026         int err, tidx;
2027
2028         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2029                           nl_neightbl_policy, extack);
2030         if (err < 0)
2031                 goto errout;
2032
2033         if (tb[NDTA_NAME] == NULL) {
2034                 err = -EINVAL;
2035                 goto errout;
2036         }
2037
2038         ndtmsg = nlmsg_data(nlh);
2039
2040         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2041                 tbl = neigh_tables[tidx];
2042                 if (!tbl)
2043                         continue;
2044                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2045                         continue;
2046                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2047                         found = true;
2048                         break;
2049                 }
2050         }
2051
2052         if (!found)
2053                 return -ENOENT;
2054
2055         /*
2056          * We acquire tbl->lock to be nice to the periodic timers and
2057          * make sure they always see a consistent set of values.
2058          */
2059         write_lock_bh(&tbl->lock);
2060
2061         if (tb[NDTA_PARMS]) {
2062                 struct nlattr *tbp[NDTPA_MAX+1];
2063                 struct neigh_parms *p;
2064                 int i, ifindex = 0;
2065
2066                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2067                                        nl_ntbl_parm_policy, extack);
2068                 if (err < 0)
2069                         goto errout_tbl_lock;
2070
2071                 if (tbp[NDTPA_IFINDEX])
2072                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2073
2074                 p = lookup_neigh_parms(tbl, net, ifindex);
2075                 if (p == NULL) {
2076                         err = -ENOENT;
2077                         goto errout_tbl_lock;
2078                 }
2079
2080                 for (i = 1; i <= NDTPA_MAX; i++) {
2081                         if (tbp[i] == NULL)
2082                                 continue;
2083
2084                         switch (i) {
2085                         case NDTPA_QUEUE_LEN:
2086                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2087                                               nla_get_u32(tbp[i]) *
2088                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2089                                 break;
2090                         case NDTPA_QUEUE_LENBYTES:
2091                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2092                                               nla_get_u32(tbp[i]));
2093                                 break;
2094                         case NDTPA_PROXY_QLEN:
2095                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2096                                               nla_get_u32(tbp[i]));
2097                                 break;
2098                         case NDTPA_APP_PROBES:
2099                                 NEIGH_VAR_SET(p, APP_PROBES,
2100                                               nla_get_u32(tbp[i]));
2101                                 break;
2102                         case NDTPA_UCAST_PROBES:
2103                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2104                                               nla_get_u32(tbp[i]));
2105                                 break;
2106                         case NDTPA_MCAST_PROBES:
2107                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2108                                               nla_get_u32(tbp[i]));
2109                                 break;
2110                         case NDTPA_MCAST_REPROBES:
2111                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2112                                               nla_get_u32(tbp[i]));
2113                                 break;
2114                         case NDTPA_BASE_REACHABLE_TIME:
2115                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2116                                               nla_get_msecs(tbp[i]));
2117                                 /* update reachable_time as well, otherwise, the change will
2118                                  * only be effective after the next time neigh_periodic_work
2119                                  * decides to recompute it (can be multiple minutes)
2120                                  */
2121                                 p->reachable_time =
2122                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2123                                 break;
2124                         case NDTPA_GC_STALETIME:
2125                                 NEIGH_VAR_SET(p, GC_STALETIME,
2126                                               nla_get_msecs(tbp[i]));
2127                                 break;
2128                         case NDTPA_DELAY_PROBE_TIME:
2129                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2130                                               nla_get_msecs(tbp[i]));
2131                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2132                                 break;
2133                         case NDTPA_RETRANS_TIME:
2134                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2135                                               nla_get_msecs(tbp[i]));
2136                                 break;
2137                         case NDTPA_ANYCAST_DELAY:
2138                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2139                                               nla_get_msecs(tbp[i]));
2140                                 break;
2141                         case NDTPA_PROXY_DELAY:
2142                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2143                                               nla_get_msecs(tbp[i]));
2144                                 break;
2145                         case NDTPA_LOCKTIME:
2146                                 NEIGH_VAR_SET(p, LOCKTIME,
2147                                               nla_get_msecs(tbp[i]));
2148                                 break;
2149                         }
2150                 }
2151         }
2152
2153         err = -ENOENT;
2154         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2155              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2156             !net_eq(net, &init_net))
2157                 goto errout_tbl_lock;
2158
2159         if (tb[NDTA_THRESH1])
2160                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2161
2162         if (tb[NDTA_THRESH2])
2163                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2164
2165         if (tb[NDTA_THRESH3])
2166                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2167
2168         if (tb[NDTA_GC_INTERVAL])
2169                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2170
2171         err = 0;
2172
2173 errout_tbl_lock:
2174         write_unlock_bh(&tbl->lock);
2175 errout:
2176         return err;
2177 }
2178
2179 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2180 {
2181         struct net *net = sock_net(skb->sk);
2182         int family, tidx, nidx = 0;
2183         int tbl_skip = cb->args[0];
2184         int neigh_skip = cb->args[1];
2185         struct neigh_table *tbl;
2186
2187         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2188
2189         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2190                 struct neigh_parms *p;
2191
2192                 tbl = neigh_tables[tidx];
2193                 if (!tbl)
2194                         continue;
2195
2196                 if (tidx < tbl_skip || (family && tbl->family != family))
2197                         continue;
2198
2199                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2200                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2201                                        NLM_F_MULTI) < 0)
2202                         break;
2203
2204                 nidx = 0;
2205                 p = list_next_entry(&tbl->parms, list);
2206                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2207                         if (!net_eq(neigh_parms_net(p), net))
2208                                 continue;
2209
2210                         if (nidx < neigh_skip)
2211                                 goto next;
2212
2213                         if (neightbl_fill_param_info(skb, tbl, p,
2214                                                      NETLINK_CB(cb->skb).portid,
2215                                                      cb->nlh->nlmsg_seq,
2216                                                      RTM_NEWNEIGHTBL,
2217                                                      NLM_F_MULTI) < 0)
2218                                 goto out;
2219                 next:
2220                         nidx++;
2221                 }
2222
2223                 neigh_skip = 0;
2224         }
2225 out:
2226         cb->args[0] = tidx;
2227         cb->args[1] = nidx;
2228
2229         return skb->len;
2230 }
2231
2232 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2233                            u32 pid, u32 seq, int type, unsigned int flags)
2234 {
2235         unsigned long now = jiffies;
2236         struct nda_cacheinfo ci;
2237         struct nlmsghdr *nlh;
2238         struct ndmsg *ndm;
2239
2240         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2241         if (nlh == NULL)
2242                 return -EMSGSIZE;
2243
2244         ndm = nlmsg_data(nlh);
2245         ndm->ndm_family  = neigh->ops->family;
2246         ndm->ndm_pad1    = 0;
2247         ndm->ndm_pad2    = 0;
2248         ndm->ndm_flags   = neigh->flags;
2249         ndm->ndm_type    = neigh->type;
2250         ndm->ndm_ifindex = neigh->dev->ifindex;
2251
2252         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2253                 goto nla_put_failure;
2254
2255         read_lock_bh(&neigh->lock);
2256         ndm->ndm_state   = neigh->nud_state;
2257         if (neigh->nud_state & NUD_VALID) {
2258                 char haddr[MAX_ADDR_LEN];
2259
2260                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2261                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2262                         read_unlock_bh(&neigh->lock);
2263                         goto nla_put_failure;
2264                 }
2265         }
2266
2267         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2268         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2269         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2270         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2271         read_unlock_bh(&neigh->lock);
2272
2273         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2274             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2275                 goto nla_put_failure;
2276
2277         nlmsg_end(skb, nlh);
2278         return 0;
2279
2280 nla_put_failure:
2281         nlmsg_cancel(skb, nlh);
2282         return -EMSGSIZE;
2283 }
2284
2285 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2286                             u32 pid, u32 seq, int type, unsigned int flags,
2287                             struct neigh_table *tbl)
2288 {
2289         struct nlmsghdr *nlh;
2290         struct ndmsg *ndm;
2291
2292         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2293         if (nlh == NULL)
2294                 return -EMSGSIZE;
2295
2296         ndm = nlmsg_data(nlh);
2297         ndm->ndm_family  = tbl->family;
2298         ndm->ndm_pad1    = 0;
2299         ndm->ndm_pad2    = 0;
2300         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2301         ndm->ndm_type    = RTN_UNICAST;
2302         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2303         ndm->ndm_state   = NUD_NONE;
2304
2305         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2306                 goto nla_put_failure;
2307
2308         nlmsg_end(skb, nlh);
2309         return 0;
2310
2311 nla_put_failure:
2312         nlmsg_cancel(skb, nlh);
2313         return -EMSGSIZE;
2314 }
2315
2316 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2317 {
2318         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2319         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2320 }
2321
2322 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2323 {
2324         struct net_device *master;
2325
2326         if (!master_idx)
2327                 return false;
2328
2329         master = netdev_master_upper_dev_get(dev);
2330         if (!master || master->ifindex != master_idx)
2331                 return true;
2332
2333         return false;
2334 }
2335
2336 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2337 {
2338         if (filter_idx && dev->ifindex != filter_idx)
2339                 return true;
2340
2341         return false;
2342 }
2343
2344 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2345                             struct netlink_callback *cb)
2346 {
2347         struct net *net = sock_net(skb->sk);
2348         const struct nlmsghdr *nlh = cb->nlh;
2349         struct nlattr *tb[NDA_MAX + 1];
2350         struct neighbour *n;
2351         int rc, h, s_h = cb->args[1];
2352         int idx, s_idx = idx = cb->args[2];
2353         struct neigh_hash_table *nht;
2354         int filter_master_idx = 0, filter_idx = 0;
2355         unsigned int flags = NLM_F_MULTI;
2356         int err;
2357
2358         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2359         if (!err) {
2360                 if (tb[NDA_IFINDEX]) {
2361                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2362                                 return -EINVAL;
2363                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2364                 }
2365                 if (tb[NDA_MASTER]) {
2366                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2367                                 return -EINVAL;
2368                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2369                 }
2370                 if (filter_idx || filter_master_idx)
2371                         flags |= NLM_F_DUMP_FILTERED;
2372         }
2373
2374         rcu_read_lock_bh();
2375         nht = rcu_dereference_bh(tbl->nht);
2376
2377         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2378                 if (h > s_h)
2379                         s_idx = 0;
2380                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2381                      n != NULL;
2382                      n = rcu_dereference_bh(n->next)) {
2383                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2384                                 goto next;
2385                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2386                             neigh_master_filtered(n->dev, filter_master_idx))
2387                                 goto next;
2388                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2389                                             cb->nlh->nlmsg_seq,
2390                                             RTM_NEWNEIGH,
2391                                             flags) < 0) {
2392                                 rc = -1;
2393                                 goto out;
2394                         }
2395 next:
2396                         idx++;
2397                 }
2398         }
2399         rc = skb->len;
2400 out:
2401         rcu_read_unlock_bh();
2402         cb->args[1] = h;
2403         cb->args[2] = idx;
2404         return rc;
2405 }
2406
2407 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2408                              struct netlink_callback *cb)
2409 {
2410         struct pneigh_entry *n;
2411         struct net *net = sock_net(skb->sk);
2412         int rc, h, s_h = cb->args[3];
2413         int idx, s_idx = idx = cb->args[4];
2414
2415         read_lock_bh(&tbl->lock);
2416
2417         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2418                 if (h > s_h)
2419                         s_idx = 0;
2420                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2421                         if (idx < s_idx || pneigh_net(n) != net)
2422                                 goto next;
2423                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2424                                             cb->nlh->nlmsg_seq,
2425                                             RTM_NEWNEIGH,
2426                                             NLM_F_MULTI, tbl) < 0) {
2427                                 read_unlock_bh(&tbl->lock);
2428                                 rc = -1;
2429                                 goto out;
2430                         }
2431                 next:
2432                         idx++;
2433                 }
2434         }
2435
2436         read_unlock_bh(&tbl->lock);
2437         rc = skb->len;
2438 out:
2439         cb->args[3] = h;
2440         cb->args[4] = idx;
2441         return rc;
2442
2443 }
2444
2445 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2446 {
2447         struct neigh_table *tbl;
2448         int t, family, s_t;
2449         int proxy = 0;
2450         int err;
2451
2452         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2453
2454         /* check for full ndmsg structure presence, family member is
2455          * the same for both structures
2456          */
2457         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2458             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2459                 proxy = 1;
2460
2461         s_t = cb->args[0];
2462
2463         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2464                 tbl = neigh_tables[t];
2465
2466                 if (!tbl)
2467                         continue;
2468                 if (t < s_t || (family && tbl->family != family))
2469                         continue;
2470                 if (t > s_t)
2471                         memset(&cb->args[1], 0, sizeof(cb->args) -
2472                                                 sizeof(cb->args[0]));
2473                 if (proxy)
2474                         err = pneigh_dump_table(tbl, skb, cb);
2475                 else
2476                         err = neigh_dump_table(tbl, skb, cb);
2477                 if (err < 0)
2478                         break;
2479         }
2480
2481         cb->args[0] = t;
2482         return skb->len;
2483 }
2484
2485 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2486 {
2487         int chain;
2488         struct neigh_hash_table *nht;
2489
2490         rcu_read_lock_bh();
2491         nht = rcu_dereference_bh(tbl->nht);
2492
2493         read_lock(&tbl->lock); /* avoid resizes */
2494         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2495                 struct neighbour *n;
2496
2497                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2498                      n != NULL;
2499                      n = rcu_dereference_bh(n->next))
2500                         cb(n, cookie);
2501         }
2502         read_unlock(&tbl->lock);
2503         rcu_read_unlock_bh();
2504 }
2505 EXPORT_SYMBOL(neigh_for_each);
2506
2507 /* The tbl->lock must be held as a writer and BH disabled. */
2508 void __neigh_for_each_release(struct neigh_table *tbl,
2509                               int (*cb)(struct neighbour *))
2510 {
2511         int chain;
2512         struct neigh_hash_table *nht;
2513
2514         nht = rcu_dereference_protected(tbl->nht,
2515                                         lockdep_is_held(&tbl->lock));
2516         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2517                 struct neighbour *n;
2518                 struct neighbour __rcu **np;
2519
2520                 np = &nht->hash_buckets[chain];
2521                 while ((n = rcu_dereference_protected(*np,
2522                                         lockdep_is_held(&tbl->lock))) != NULL) {
2523                         int release;
2524
2525                         write_lock(&n->lock);
2526                         release = cb(n);
2527                         if (release) {
2528                                 rcu_assign_pointer(*np,
2529                                         rcu_dereference_protected(n->next,
2530                                                 lockdep_is_held(&tbl->lock)));
2531                                 n->dead = 1;
2532                         } else
2533                                 np = &n->next;
2534                         write_unlock(&n->lock);
2535                         if (release)
2536                                 neigh_cleanup_and_release(n);
2537                 }
2538         }
2539 }
2540 EXPORT_SYMBOL(__neigh_for_each_release);
2541
2542 int neigh_xmit(int index, struct net_device *dev,
2543                const void *addr, struct sk_buff *skb)
2544 {
2545         int err = -EAFNOSUPPORT;
2546         if (likely(index < NEIGH_NR_TABLES)) {
2547                 struct neigh_table *tbl;
2548                 struct neighbour *neigh;
2549
2550                 tbl = neigh_tables[index];
2551                 if (!tbl)
2552                         goto out;
2553                 rcu_read_lock_bh();
2554                 if (index == NEIGH_ARP_TABLE) {
2555                         u32 key = *((u32 *)addr);
2556
2557                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2558                 } else {
2559                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2560                 }
2561                 if (!neigh)
2562                         neigh = __neigh_create(tbl, addr, dev, false);
2563                 err = PTR_ERR(neigh);
2564                 if (IS_ERR(neigh)) {
2565                         rcu_read_unlock_bh();
2566                         goto out_kfree_skb;
2567                 }
2568                 err = neigh->output(neigh, skb);
2569                 rcu_read_unlock_bh();
2570         }
2571         else if (index == NEIGH_LINK_TABLE) {
2572                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2573                                       addr, NULL, skb->len);
2574                 if (err < 0)
2575                         goto out_kfree_skb;
2576                 err = dev_queue_xmit(skb);
2577         }
2578 out:
2579         return err;
2580 out_kfree_skb:
2581         kfree_skb(skb);
2582         goto out;
2583 }
2584 EXPORT_SYMBOL(neigh_xmit);
2585
2586 #ifdef CONFIG_PROC_FS
2587
2588 static struct neighbour *neigh_get_first(struct seq_file *seq)
2589 {
2590         struct neigh_seq_state *state = seq->private;
2591         struct net *net = seq_file_net(seq);
2592         struct neigh_hash_table *nht = state->nht;
2593         struct neighbour *n = NULL;
2594         int bucket = state->bucket;
2595
2596         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2597         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2598                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2599
2600                 while (n) {
2601                         if (!net_eq(dev_net(n->dev), net))
2602                                 goto next;
2603                         if (state->neigh_sub_iter) {
2604                                 loff_t fakep = 0;
2605                                 void *v;
2606
2607                                 v = state->neigh_sub_iter(state, n, &fakep);
2608                                 if (!v)
2609                                         goto next;
2610                         }
2611                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2612                                 break;
2613                         if (n->nud_state & ~NUD_NOARP)
2614                                 break;
2615 next:
2616                         n = rcu_dereference_bh(n->next);
2617                 }
2618
2619                 if (n)
2620                         break;
2621         }
2622         state->bucket = bucket;
2623
2624         return n;
2625 }
2626
2627 static struct neighbour *neigh_get_next(struct seq_file *seq,
2628                                         struct neighbour *n,
2629                                         loff_t *pos)
2630 {
2631         struct neigh_seq_state *state = seq->private;
2632         struct net *net = seq_file_net(seq);
2633         struct neigh_hash_table *nht = state->nht;
2634
2635         if (state->neigh_sub_iter) {
2636                 void *v = state->neigh_sub_iter(state, n, pos);
2637                 if (v)
2638                         return n;
2639         }
2640         n = rcu_dereference_bh(n->next);
2641
2642         while (1) {
2643                 while (n) {
2644                         if (!net_eq(dev_net(n->dev), net))
2645                                 goto next;
2646                         if (state->neigh_sub_iter) {
2647                                 void *v = state->neigh_sub_iter(state, n, pos);
2648                                 if (v)
2649                                         return n;
2650                                 goto next;
2651                         }
2652                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2653                                 break;
2654
2655                         if (n->nud_state & ~NUD_NOARP)
2656                                 break;
2657 next:
2658                         n = rcu_dereference_bh(n->next);
2659                 }
2660
2661                 if (n)
2662                         break;
2663
2664                 if (++state->bucket >= (1 << nht->hash_shift))
2665                         break;
2666
2667                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2668         }
2669
2670         if (n && pos)
2671                 --(*pos);
2672         return n;
2673 }
2674
2675 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2676 {
2677         struct neighbour *n = neigh_get_first(seq);
2678
2679         if (n) {
2680                 --(*pos);
2681                 while (*pos) {
2682                         n = neigh_get_next(seq, n, pos);
2683                         if (!n)
2684                                 break;
2685                 }
2686         }
2687         return *pos ? NULL : n;
2688 }
2689
2690 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2691 {
2692         struct neigh_seq_state *state = seq->private;
2693         struct net *net = seq_file_net(seq);
2694         struct neigh_table *tbl = state->tbl;
2695         struct pneigh_entry *pn = NULL;
2696         int bucket = state->bucket;
2697
2698         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2699         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2700                 pn = tbl->phash_buckets[bucket];
2701                 while (pn && !net_eq(pneigh_net(pn), net))
2702                         pn = pn->next;
2703                 if (pn)
2704                         break;
2705         }
2706         state->bucket = bucket;
2707
2708         return pn;
2709 }
2710
2711 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2712                                             struct pneigh_entry *pn,
2713                                             loff_t *pos)
2714 {
2715         struct neigh_seq_state *state = seq->private;
2716         struct net *net = seq_file_net(seq);
2717         struct neigh_table *tbl = state->tbl;
2718
2719         do {
2720                 pn = pn->next;
2721         } while (pn && !net_eq(pneigh_net(pn), net));
2722
2723         while (!pn) {
2724                 if (++state->bucket > PNEIGH_HASHMASK)
2725                         break;
2726                 pn = tbl->phash_buckets[state->bucket];
2727                 while (pn && !net_eq(pneigh_net(pn), net))
2728                         pn = pn->next;
2729                 if (pn)
2730                         break;
2731         }
2732
2733         if (pn && pos)
2734                 --(*pos);
2735
2736         return pn;
2737 }
2738
2739 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2740 {
2741         struct pneigh_entry *pn = pneigh_get_first(seq);
2742
2743         if (pn) {
2744                 --(*pos);
2745                 while (*pos) {
2746                         pn = pneigh_get_next(seq, pn, pos);
2747                         if (!pn)
2748                                 break;
2749                 }
2750         }
2751         return *pos ? NULL : pn;
2752 }
2753
2754 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2755 {
2756         struct neigh_seq_state *state = seq->private;
2757         void *rc;
2758         loff_t idxpos = *pos;
2759
2760         rc = neigh_get_idx(seq, &idxpos);
2761         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2762                 rc = pneigh_get_idx(seq, &idxpos);
2763
2764         return rc;
2765 }
2766
2767 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2768         __acquires(tbl->lock)
2769         __acquires(rcu_bh)
2770 {
2771         struct neigh_seq_state *state = seq->private;
2772
2773         state->tbl = tbl;
2774         state->bucket = 0;
2775         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2776
2777         rcu_read_lock_bh();
2778         state->nht = rcu_dereference_bh(tbl->nht);
2779         read_lock(&tbl->lock);
2780
2781         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2782 }
2783 EXPORT_SYMBOL(neigh_seq_start);
2784
2785 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2786 {
2787         struct neigh_seq_state *state;
2788         void *rc;
2789
2790         if (v == SEQ_START_TOKEN) {
2791                 rc = neigh_get_first(seq);
2792                 goto out;
2793         }
2794
2795         state = seq->private;
2796         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2797                 rc = neigh_get_next(seq, v, NULL);
2798                 if (rc)
2799                         goto out;
2800                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2801                         rc = pneigh_get_first(seq);
2802         } else {
2803                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2804                 rc = pneigh_get_next(seq, v, NULL);
2805         }
2806 out:
2807         ++(*pos);
2808         return rc;
2809 }
2810 EXPORT_SYMBOL(neigh_seq_next);
2811
2812 void neigh_seq_stop(struct seq_file *seq, void *v)
2813         __releases(tbl->lock)
2814         __releases(rcu_bh)
2815 {
2816         struct neigh_seq_state *state = seq->private;
2817         struct neigh_table *tbl = state->tbl;
2818
2819         read_unlock(&tbl->lock);
2820         rcu_read_unlock_bh();
2821 }
2822 EXPORT_SYMBOL(neigh_seq_stop);
2823
2824 /* statistics via seq_file */
2825
2826 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2827 {
2828         struct neigh_table *tbl = seq->private;
2829         int cpu;
2830
2831         if (*pos == 0)
2832                 return SEQ_START_TOKEN;
2833
2834         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2835                 if (!cpu_possible(cpu))
2836                         continue;
2837                 *pos = cpu+1;
2838                 return per_cpu_ptr(tbl->stats, cpu);
2839         }
2840         return NULL;
2841 }
2842
2843 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2844 {
2845         struct neigh_table *tbl = seq->private;
2846         int cpu;
2847
2848         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2849                 if (!cpu_possible(cpu))
2850                         continue;
2851                 *pos = cpu+1;
2852                 return per_cpu_ptr(tbl->stats, cpu);
2853         }
2854         (*pos)++;
2855         return NULL;
2856 }
2857
2858 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2859 {
2860
2861 }
2862
2863 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2864 {
2865         struct neigh_table *tbl = seq->private;
2866         struct neigh_statistics *st = v;
2867
2868         if (v == SEQ_START_TOKEN) {
2869                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2870                 return 0;
2871         }
2872
2873         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2874                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2875                    atomic_read(&tbl->entries),
2876
2877                    st->allocs,
2878                    st->destroys,
2879                    st->hash_grows,
2880
2881                    st->lookups,
2882                    st->hits,
2883
2884                    st->res_failed,
2885
2886                    st->rcv_probes_mcast,
2887                    st->rcv_probes_ucast,
2888
2889                    st->periodic_gc_runs,
2890                    st->forced_gc_runs,
2891                    st->unres_discards,
2892                    st->table_fulls
2893                    );
2894
2895         return 0;
2896 }
2897
2898 static const struct seq_operations neigh_stat_seq_ops = {
2899         .start  = neigh_stat_seq_start,
2900         .next   = neigh_stat_seq_next,
2901         .stop   = neigh_stat_seq_stop,
2902         .show   = neigh_stat_seq_show,
2903 };
2904
2905 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2906 {
2907         int ret = seq_open(file, &neigh_stat_seq_ops);
2908
2909         if (!ret) {
2910                 struct seq_file *sf = file->private_data;
2911                 sf->private = PDE_DATA(inode);
2912         }
2913         return ret;
2914 };
2915
2916 static const struct file_operations neigh_stat_seq_fops = {
2917         .owner   = THIS_MODULE,
2918         .open    = neigh_stat_seq_open,
2919         .read    = seq_read,
2920         .llseek  = seq_lseek,
2921         .release = seq_release,
2922 };
2923
2924 #endif /* CONFIG_PROC_FS */
2925
2926 static inline size_t neigh_nlmsg_size(void)
2927 {
2928         return NLMSG_ALIGN(sizeof(struct ndmsg))
2929                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2930                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2931                + nla_total_size(sizeof(struct nda_cacheinfo))
2932                + nla_total_size(4); /* NDA_PROBES */
2933 }
2934
2935 static void __neigh_notify(struct neighbour *n, int type, int flags,
2936                            u32 pid)
2937 {
2938         struct net *net = dev_net(n->dev);
2939         struct sk_buff *skb;
2940         int err = -ENOBUFS;
2941
2942         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2943         if (skb == NULL)
2944                 goto errout;
2945
2946         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2947         if (err < 0) {
2948                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2949                 WARN_ON(err == -EMSGSIZE);
2950                 kfree_skb(skb);
2951                 goto errout;
2952         }
2953         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2954         return;
2955 errout:
2956         if (err < 0)
2957                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2958 }
2959
2960 void neigh_app_ns(struct neighbour *n)
2961 {
2962         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2963 }
2964 EXPORT_SYMBOL(neigh_app_ns);
2965
2966 #ifdef CONFIG_SYSCTL
2967 static int zero;
2968 static int int_max = INT_MAX;
2969 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2970
2971 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2972                            void __user *buffer, size_t *lenp, loff_t *ppos)
2973 {
2974         int size, ret;
2975         struct ctl_table tmp = *ctl;
2976
2977         tmp.extra1 = &zero;
2978         tmp.extra2 = &unres_qlen_max;
2979         tmp.data = &size;
2980
2981         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2982         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2983
2984         if (write && !ret)
2985                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2986         return ret;
2987 }
2988
2989 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2990                                                    int family)
2991 {
2992         switch (family) {
2993         case AF_INET:
2994                 return __in_dev_arp_parms_get_rcu(dev);
2995         case AF_INET6:
2996                 return __in6_dev_nd_parms_get_rcu(dev);
2997         }
2998         return NULL;
2999 }
3000
3001 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3002                                   int index)
3003 {
3004         struct net_device *dev;
3005         int family = neigh_parms_family(p);
3006
3007         rcu_read_lock();
3008         for_each_netdev_rcu(net, dev) {
3009                 struct neigh_parms *dst_p =
3010                                 neigh_get_dev_parms_rcu(dev, family);
3011
3012                 if (dst_p && !test_bit(index, dst_p->data_state))
3013                         dst_p->data[index] = p->data[index];
3014         }
3015         rcu_read_unlock();
3016 }
3017
3018 static void neigh_proc_update(struct ctl_table *ctl, int write)
3019 {
3020         struct net_device *dev = ctl->extra1;
3021         struct neigh_parms *p = ctl->extra2;
3022         struct net *net = neigh_parms_net(p);
3023         int index = (int *) ctl->data - p->data;
3024
3025         if (!write)
3026                 return;
3027
3028         set_bit(index, p->data_state);
3029         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3030                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3031         if (!dev) /* NULL dev means this is default value */
3032                 neigh_copy_dflt_parms(net, p, index);
3033 }
3034
3035 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3036                                            void __user *buffer,
3037                                            size_t *lenp, loff_t *ppos)
3038 {
3039         struct ctl_table tmp = *ctl;
3040         int ret;
3041
3042         tmp.extra1 = &zero;
3043         tmp.extra2 = &int_max;
3044
3045         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3046         neigh_proc_update(ctl, write);
3047         return ret;
3048 }
3049
3050 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3051                         void __user *buffer, size_t *lenp, loff_t *ppos)
3052 {
3053         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3054
3055         neigh_proc_update(ctl, write);
3056         return ret;
3057 }
3058 EXPORT_SYMBOL(neigh_proc_dointvec);
3059
3060 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3061                                 void __user *buffer,
3062                                 size_t *lenp, loff_t *ppos)
3063 {
3064         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3065
3066         neigh_proc_update(ctl, write);
3067         return ret;
3068 }
3069 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3070
3071 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3072                                               void __user *buffer,
3073                                               size_t *lenp, loff_t *ppos)
3074 {
3075         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3076
3077         neigh_proc_update(ctl, write);
3078         return ret;
3079 }
3080
3081 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3082                                    void __user *buffer,
3083                                    size_t *lenp, loff_t *ppos)
3084 {
3085         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3086
3087         neigh_proc_update(ctl, write);
3088         return ret;
3089 }
3090 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3091
3092 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3093                                           void __user *buffer,
3094                                           size_t *lenp, loff_t *ppos)
3095 {
3096         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3097
3098         neigh_proc_update(ctl, write);
3099         return ret;
3100 }
3101
3102 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3103                                           void __user *buffer,
3104                                           size_t *lenp, loff_t *ppos)
3105 {
3106         struct neigh_parms *p = ctl->extra2;
3107         int ret;
3108
3109         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3110                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3111         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3112                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3113         else
3114                 ret = -1;
3115
3116         if (write && ret == 0) {
3117                 /* update reachable_time as well, otherwise, the change will
3118                  * only be effective after the next time neigh_periodic_work
3119                  * decides to recompute it
3120                  */
3121                 p->reachable_time =
3122                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3123         }
3124         return ret;
3125 }
3126
3127 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3128         (&((struct neigh_parms *) 0)->data[index])
3129
3130 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3131         [NEIGH_VAR_ ## attr] = { \
3132                 .procname       = name, \
3133                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3134                 .maxlen         = sizeof(int), \
3135                 .mode           = mval, \
3136                 .proc_handler   = proc, \
3137         }
3138
3139 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3140         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3141
3142 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3143         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3144
3145 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3146         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3147
3148 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3149         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3150
3151 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3152         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3153
3154 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3155         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3156
3157 static struct neigh_sysctl_table {
3158         struct ctl_table_header *sysctl_header;
3159         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3160 } neigh_sysctl_template __read_mostly = {
3161         .neigh_vars = {
3162                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3163                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3164                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3165                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3166                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3167                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3168                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3169                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3170                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3171                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3172                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3173                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3174                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3175                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3176                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3177                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3178                 [NEIGH_VAR_GC_INTERVAL] = {
3179                         .procname       = "gc_interval",
3180                         .maxlen         = sizeof(int),
3181                         .mode           = 0644,
3182                         .proc_handler   = proc_dointvec_jiffies,
3183                 },
3184                 [NEIGH_VAR_GC_THRESH1] = {
3185                         .procname       = "gc_thresh1",
3186                         .maxlen         = sizeof(int),
3187                         .mode           = 0644,
3188                         .extra1         = &zero,
3189                         .extra2         = &int_max,
3190                         .proc_handler   = proc_dointvec_minmax,
3191                 },
3192                 [NEIGH_VAR_GC_THRESH2] = {
3193                         .procname       = "gc_thresh2",
3194                         .maxlen         = sizeof(int),
3195                         .mode           = 0644,
3196                         .extra1         = &zero,
3197                         .extra2         = &int_max,
3198                         .proc_handler   = proc_dointvec_minmax,
3199                 },
3200                 [NEIGH_VAR_GC_THRESH3] = {
3201                         .procname       = "gc_thresh3",
3202                         .maxlen         = sizeof(int),
3203                         .mode           = 0644,
3204                         .extra1         = &zero,
3205                         .extra2         = &int_max,
3206                         .proc_handler   = proc_dointvec_minmax,
3207                 },
3208                 {},
3209         },
3210 };
3211
3212 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3213                           proc_handler *handler)
3214 {
3215         int i;
3216         struct neigh_sysctl_table *t;
3217         const char *dev_name_source;
3218         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3219         char *p_name;
3220
3221         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3222         if (!t)
3223                 goto err;
3224
3225         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3226                 t->neigh_vars[i].data += (long) p;
3227                 t->neigh_vars[i].extra1 = dev;
3228                 t->neigh_vars[i].extra2 = p;
3229         }
3230
3231         if (dev) {
3232                 dev_name_source = dev->name;
3233                 /* Terminate the table early */
3234                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3235                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3236         } else {
3237                 struct neigh_table *tbl = p->tbl;
3238                 dev_name_source = "default";
3239                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3240                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3241                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3242                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3243         }
3244
3245         if (handler) {
3246                 /* RetransTime */
3247                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3248                 /* ReachableTime */
3249                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3250                 /* RetransTime (in milliseconds)*/
3251                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3252                 /* ReachableTime (in milliseconds) */
3253                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3254         } else {
3255                 /* Those handlers will update p->reachable_time after
3256                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3257                  * applied after the next neighbour update instead of waiting for
3258                  * neigh_periodic_work to update its value (can be multiple minutes)
3259                  * So any handler that replaces them should do this as well
3260                  */
3261                 /* ReachableTime */
3262                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3263                         neigh_proc_base_reachable_time;
3264                 /* ReachableTime (in milliseconds) */
3265                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3266                         neigh_proc_base_reachable_time;
3267         }
3268
3269         /* Don't export sysctls to unprivileged users */
3270         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3271                 t->neigh_vars[0].procname = NULL;
3272
3273         switch (neigh_parms_family(p)) {
3274         case AF_INET:
3275               p_name = "ipv4";
3276               break;
3277         case AF_INET6:
3278               p_name = "ipv6";
3279               break;
3280         default:
3281               BUG();
3282         }
3283
3284         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3285                 p_name, dev_name_source);
3286         t->sysctl_header =
3287                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3288         if (!t->sysctl_header)
3289                 goto free;
3290
3291         p->sysctl_table = t;
3292         return 0;
3293
3294 free:
3295         kfree(t);
3296 err:
3297         return -ENOBUFS;
3298 }
3299 EXPORT_SYMBOL(neigh_sysctl_register);
3300
3301 void neigh_sysctl_unregister(struct neigh_parms *p)
3302 {
3303         if (p->sysctl_table) {
3304                 struct neigh_sysctl_table *t = p->sysctl_table;
3305                 p->sysctl_table = NULL;
3306                 unregister_net_sysctl_table(t->sysctl_header);
3307                 kfree(t);
3308         }
3309 }
3310 EXPORT_SYMBOL(neigh_sysctl_unregister);
3311
3312 #endif  /* CONFIG_SYSCTL */
3313
3314 static int __init neigh_init(void)
3315 {
3316         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3317         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3318         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3319
3320         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3321                       0);
3322         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3323
3324         return 0;
3325 }
3326
3327 subsys_initcall(neigh_init);
3328