GNU Linux-libre 4.19.207-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(struct timer_list *t);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58                            u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61                                     struct net_device *dev);
62
63 #ifdef CONFIG_PROC_FS
64 static const struct seq_operations neigh_stat_seq_ops;
65 #endif
66
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81
82    Reference count prevents destruction.
83
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
125                       struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127         bool retval = false;
128
129         write_lock(&n->lock);
130         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
131             !(n->flags & flags)) {
132                 struct neighbour *neigh;
133
134                 neigh = rcu_dereference_protected(n->next,
135                                                   lockdep_is_held(&tbl->lock));
136                 rcu_assign_pointer(*np, neigh);
137                 n->dead = 1;
138                 retval = true;
139         }
140         write_unlock(&n->lock);
141         if (retval)
142                 neigh_cleanup_and_release(n);
143         return retval;
144 }
145
146 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
147 {
148         struct neigh_hash_table *nht;
149         void *pkey = ndel->primary_key;
150         u32 hash_val;
151         struct neighbour *n;
152         struct neighbour __rcu **np;
153
154         nht = rcu_dereference_protected(tbl->nht,
155                                         lockdep_is_held(&tbl->lock));
156         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
157         hash_val = hash_val >> (32 - nht->hash_shift);
158
159         np = &nht->hash_buckets[hash_val];
160         while ((n = rcu_dereference_protected(*np,
161                                               lockdep_is_held(&tbl->lock)))) {
162                 if (n == ndel)
163                         return neigh_del(n, 0, 0, np, tbl);
164                 np = &n->next;
165         }
166         return false;
167 }
168
169 static int neigh_forced_gc(struct neigh_table *tbl)
170 {
171         int shrunk = 0;
172         int i;
173         struct neigh_hash_table *nht;
174
175         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
176
177         write_lock_bh(&tbl->lock);
178         nht = rcu_dereference_protected(tbl->nht,
179                                         lockdep_is_held(&tbl->lock));
180         for (i = 0; i < (1 << nht->hash_shift); i++) {
181                 struct neighbour *n;
182                 struct neighbour __rcu **np;
183
184                 np = &nht->hash_buckets[i];
185                 while ((n = rcu_dereference_protected(*np,
186                                         lockdep_is_held(&tbl->lock))) != NULL) {
187                         /* Neighbour record may be discarded if:
188                          * - nobody refers to it.
189                          * - it is not permanent
190                          */
191                         if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
192                                       tbl)) {
193                                 shrunk = 1;
194                                 continue;
195                         }
196                         np = &n->next;
197                 }
198         }
199
200         tbl->last_flush = jiffies;
201
202         write_unlock_bh(&tbl->lock);
203
204         return shrunk;
205 }
206
207 static void neigh_add_timer(struct neighbour *n, unsigned long when)
208 {
209         neigh_hold(n);
210         if (unlikely(mod_timer(&n->timer, when))) {
211                 printk("NEIGH: BUG, double timer add, state is %x\n",
212                        n->nud_state);
213                 dump_stack();
214         }
215 }
216
217 static int neigh_del_timer(struct neighbour *n)
218 {
219         if ((n->nud_state & NUD_IN_TIMER) &&
220             del_timer(&n->timer)) {
221                 neigh_release(n);
222                 return 1;
223         }
224         return 0;
225 }
226
227 static void pneigh_queue_purge(struct sk_buff_head *list)
228 {
229         struct sk_buff *skb;
230
231         while ((skb = skb_dequeue(list)) != NULL) {
232                 dev_put(skb->dev);
233                 kfree_skb(skb);
234         }
235 }
236
237 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
238 {
239         int i;
240         struct neigh_hash_table *nht;
241
242         nht = rcu_dereference_protected(tbl->nht,
243                                         lockdep_is_held(&tbl->lock));
244
245         for (i = 0; i < (1 << nht->hash_shift); i++) {
246                 struct neighbour *n;
247                 struct neighbour __rcu **np = &nht->hash_buckets[i];
248
249                 while ((n = rcu_dereference_protected(*np,
250                                         lockdep_is_held(&tbl->lock))) != NULL) {
251                         if (dev && n->dev != dev) {
252                                 np = &n->next;
253                                 continue;
254                         }
255                         rcu_assign_pointer(*np,
256                                    rcu_dereference_protected(n->next,
257                                                 lockdep_is_held(&tbl->lock)));
258                         write_lock(&n->lock);
259                         neigh_del_timer(n);
260                         n->dead = 1;
261
262                         if (refcount_read(&n->refcnt) != 1) {
263                                 /* The most unpleasant situation.
264                                    We must destroy neighbour entry,
265                                    but someone still uses it.
266
267                                    The destroy will be delayed until
268                                    the last user releases us, but
269                                    we must kill timers etc. and move
270                                    it to safe state.
271                                  */
272                                 __skb_queue_purge(&n->arp_queue);
273                                 n->arp_queue_len_bytes = 0;
274                                 n->output = neigh_blackhole;
275                                 if (n->nud_state & NUD_VALID)
276                                         n->nud_state = NUD_NOARP;
277                                 else
278                                         n->nud_state = NUD_NONE;
279                                 neigh_dbg(2, "neigh %p is stray\n", n);
280                         }
281                         write_unlock(&n->lock);
282                         neigh_cleanup_and_release(n);
283                 }
284         }
285 }
286
287 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
288 {
289         write_lock_bh(&tbl->lock);
290         neigh_flush_dev(tbl, dev);
291         write_unlock_bh(&tbl->lock);
292 }
293 EXPORT_SYMBOL(neigh_changeaddr);
294
295 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
296 {
297         write_lock_bh(&tbl->lock);
298         neigh_flush_dev(tbl, dev);
299         pneigh_ifdown_and_unlock(tbl, dev);
300
301         del_timer_sync(&tbl->proxy_timer);
302         pneigh_queue_purge(&tbl->proxy_queue);
303         return 0;
304 }
305 EXPORT_SYMBOL(neigh_ifdown);
306
307 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
308 {
309         struct neighbour *n = NULL;
310         unsigned long now = jiffies;
311         int entries;
312
313         entries = atomic_inc_return(&tbl->entries) - 1;
314         if (entries >= tbl->gc_thresh3 ||
315             (entries >= tbl->gc_thresh2 &&
316              time_after(now, tbl->last_flush + 5 * HZ))) {
317                 if (!neigh_forced_gc(tbl) &&
318                     entries >= tbl->gc_thresh3) {
319                         net_info_ratelimited("%s: neighbor table overflow!\n",
320                                              tbl->id);
321                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
322                         goto out_entries;
323                 }
324         }
325
326         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
327         if (!n)
328                 goto out_entries;
329
330         __skb_queue_head_init(&n->arp_queue);
331         rwlock_init(&n->lock);
332         seqlock_init(&n->ha_lock);
333         n->updated        = n->used = now;
334         n->nud_state      = NUD_NONE;
335         n->output         = neigh_blackhole;
336         seqlock_init(&n->hh.hh_lock);
337         n->parms          = neigh_parms_clone(&tbl->parms);
338         timer_setup(&n->timer, neigh_timer_handler, 0);
339
340         NEIGH_CACHE_STAT_INC(tbl, allocs);
341         n->tbl            = tbl;
342         refcount_set(&n->refcnt, 1);
343         n->dead           = 1;
344 out:
345         return n;
346
347 out_entries:
348         atomic_dec(&tbl->entries);
349         goto out;
350 }
351
352 static void neigh_get_hash_rnd(u32 *x)
353 {
354         *x = get_random_u32() | 1;
355 }
356
357 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
358 {
359         size_t size = (1 << shift) * sizeof(struct neighbour *);
360         struct neigh_hash_table *ret;
361         struct neighbour __rcu **buckets;
362         int i;
363
364         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
365         if (!ret)
366                 return NULL;
367         if (size <= PAGE_SIZE) {
368                 buckets = kzalloc(size, GFP_ATOMIC);
369         } else {
370                 buckets = (struct neighbour __rcu **)
371                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
372                                            get_order(size));
373                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
374         }
375         if (!buckets) {
376                 kfree(ret);
377                 return NULL;
378         }
379         ret->hash_buckets = buckets;
380         ret->hash_shift = shift;
381         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
382                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
383         return ret;
384 }
385
386 static void neigh_hash_free_rcu(struct rcu_head *head)
387 {
388         struct neigh_hash_table *nht = container_of(head,
389                                                     struct neigh_hash_table,
390                                                     rcu);
391         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
392         struct neighbour __rcu **buckets = nht->hash_buckets;
393
394         if (size <= PAGE_SIZE) {
395                 kfree(buckets);
396         } else {
397                 kmemleak_free(buckets);
398                 free_pages((unsigned long)buckets, get_order(size));
399         }
400         kfree(nht);
401 }
402
403 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
404                                                 unsigned long new_shift)
405 {
406         unsigned int i, hash;
407         struct neigh_hash_table *new_nht, *old_nht;
408
409         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
410
411         old_nht = rcu_dereference_protected(tbl->nht,
412                                             lockdep_is_held(&tbl->lock));
413         new_nht = neigh_hash_alloc(new_shift);
414         if (!new_nht)
415                 return old_nht;
416
417         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
418                 struct neighbour *n, *next;
419
420                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
421                                                    lockdep_is_held(&tbl->lock));
422                      n != NULL;
423                      n = next) {
424                         hash = tbl->hash(n->primary_key, n->dev,
425                                          new_nht->hash_rnd);
426
427                         hash >>= (32 - new_nht->hash_shift);
428                         next = rcu_dereference_protected(n->next,
429                                                 lockdep_is_held(&tbl->lock));
430
431                         rcu_assign_pointer(n->next,
432                                            rcu_dereference_protected(
433                                                 new_nht->hash_buckets[hash],
434                                                 lockdep_is_held(&tbl->lock)));
435                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
436                 }
437         }
438
439         rcu_assign_pointer(tbl->nht, new_nht);
440         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
441         return new_nht;
442 }
443
444 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
445                                struct net_device *dev)
446 {
447         struct neighbour *n;
448
449         NEIGH_CACHE_STAT_INC(tbl, lookups);
450
451         rcu_read_lock_bh();
452         n = __neigh_lookup_noref(tbl, pkey, dev);
453         if (n) {
454                 if (!refcount_inc_not_zero(&n->refcnt))
455                         n = NULL;
456                 NEIGH_CACHE_STAT_INC(tbl, hits);
457         }
458
459         rcu_read_unlock_bh();
460         return n;
461 }
462 EXPORT_SYMBOL(neigh_lookup);
463
464 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
465                                      const void *pkey)
466 {
467         struct neighbour *n;
468         unsigned int key_len = tbl->key_len;
469         u32 hash_val;
470         struct neigh_hash_table *nht;
471
472         NEIGH_CACHE_STAT_INC(tbl, lookups);
473
474         rcu_read_lock_bh();
475         nht = rcu_dereference_bh(tbl->nht);
476         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
477
478         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
479              n != NULL;
480              n = rcu_dereference_bh(n->next)) {
481                 if (!memcmp(n->primary_key, pkey, key_len) &&
482                     net_eq(dev_net(n->dev), net)) {
483                         if (!refcount_inc_not_zero(&n->refcnt))
484                                 n = NULL;
485                         NEIGH_CACHE_STAT_INC(tbl, hits);
486                         break;
487                 }
488         }
489
490         rcu_read_unlock_bh();
491         return n;
492 }
493 EXPORT_SYMBOL(neigh_lookup_nodev);
494
495 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
496                                  struct net_device *dev, bool want_ref)
497 {
498         u32 hash_val;
499         unsigned int key_len = tbl->key_len;
500         int error;
501         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
502         struct neigh_hash_table *nht;
503
504         if (!n) {
505                 rc = ERR_PTR(-ENOBUFS);
506                 goto out;
507         }
508
509         memcpy(n->primary_key, pkey, key_len);
510         n->dev = dev;
511         dev_hold(dev);
512
513         /* Protocol specific setup. */
514         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
515                 rc = ERR_PTR(error);
516                 goto out_neigh_release;
517         }
518
519         if (dev->netdev_ops->ndo_neigh_construct) {
520                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
521                 if (error < 0) {
522                         rc = ERR_PTR(error);
523                         goto out_neigh_release;
524                 }
525         }
526
527         /* Device specific setup. */
528         if (n->parms->neigh_setup &&
529             (error = n->parms->neigh_setup(n)) < 0) {
530                 rc = ERR_PTR(error);
531                 goto out_neigh_release;
532         }
533
534         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
535
536         write_lock_bh(&tbl->lock);
537         nht = rcu_dereference_protected(tbl->nht,
538                                         lockdep_is_held(&tbl->lock));
539
540         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
541                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
542
543         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
544
545         if (n->parms->dead) {
546                 rc = ERR_PTR(-EINVAL);
547                 goto out_tbl_unlock;
548         }
549
550         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
551                                             lockdep_is_held(&tbl->lock));
552              n1 != NULL;
553              n1 = rcu_dereference_protected(n1->next,
554                         lockdep_is_held(&tbl->lock))) {
555                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
556                         if (want_ref)
557                                 neigh_hold(n1);
558                         rc = n1;
559                         goto out_tbl_unlock;
560                 }
561         }
562
563         n->dead = 0;
564         if (want_ref)
565                 neigh_hold(n);
566         rcu_assign_pointer(n->next,
567                            rcu_dereference_protected(nht->hash_buckets[hash_val],
568                                                      lockdep_is_held(&tbl->lock)));
569         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
570         write_unlock_bh(&tbl->lock);
571         neigh_dbg(2, "neigh %p is created\n", n);
572         rc = n;
573 out:
574         return rc;
575 out_tbl_unlock:
576         write_unlock_bh(&tbl->lock);
577 out_neigh_release:
578         neigh_release(n);
579         goto out;
580 }
581 EXPORT_SYMBOL(__neigh_create);
582
583 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
584 {
585         u32 hash_val = *(u32 *)(pkey + key_len - 4);
586         hash_val ^= (hash_val >> 16);
587         hash_val ^= hash_val >> 8;
588         hash_val ^= hash_val >> 4;
589         hash_val &= PNEIGH_HASHMASK;
590         return hash_val;
591 }
592
593 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
594                                               struct net *net,
595                                               const void *pkey,
596                                               unsigned int key_len,
597                                               struct net_device *dev)
598 {
599         while (n) {
600                 if (!memcmp(n->key, pkey, key_len) &&
601                     net_eq(pneigh_net(n), net) &&
602                     (n->dev == dev || !n->dev))
603                         return n;
604                 n = n->next;
605         }
606         return NULL;
607 }
608
609 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
610                 struct net *net, const void *pkey, struct net_device *dev)
611 {
612         unsigned int key_len = tbl->key_len;
613         u32 hash_val = pneigh_hash(pkey, key_len);
614
615         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
616                                  net, pkey, key_len, dev);
617 }
618 EXPORT_SYMBOL_GPL(__pneigh_lookup);
619
620 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
621                                     struct net *net, const void *pkey,
622                                     struct net_device *dev, int creat)
623 {
624         struct pneigh_entry *n;
625         unsigned int key_len = tbl->key_len;
626         u32 hash_val = pneigh_hash(pkey, key_len);
627
628         read_lock_bh(&tbl->lock);
629         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
630                               net, pkey, key_len, dev);
631         read_unlock_bh(&tbl->lock);
632
633         if (n || !creat)
634                 goto out;
635
636         ASSERT_RTNL();
637
638         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
639         if (!n)
640                 goto out;
641
642         write_pnet(&n->net, net);
643         memcpy(n->key, pkey, key_len);
644         n->dev = dev;
645         if (dev)
646                 dev_hold(dev);
647
648         if (tbl->pconstructor && tbl->pconstructor(n)) {
649                 if (dev)
650                         dev_put(dev);
651                 kfree(n);
652                 n = NULL;
653                 goto out;
654         }
655
656         write_lock_bh(&tbl->lock);
657         n->next = tbl->phash_buckets[hash_val];
658         tbl->phash_buckets[hash_val] = n;
659         write_unlock_bh(&tbl->lock);
660 out:
661         return n;
662 }
663 EXPORT_SYMBOL(pneigh_lookup);
664
665
666 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
667                   struct net_device *dev)
668 {
669         struct pneigh_entry *n, **np;
670         unsigned int key_len = tbl->key_len;
671         u32 hash_val = pneigh_hash(pkey, key_len);
672
673         write_lock_bh(&tbl->lock);
674         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
675              np = &n->next) {
676                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
677                     net_eq(pneigh_net(n), net)) {
678                         *np = n->next;
679                         write_unlock_bh(&tbl->lock);
680                         if (tbl->pdestructor)
681                                 tbl->pdestructor(n);
682                         if (n->dev)
683                                 dev_put(n->dev);
684                         kfree(n);
685                         return 0;
686                 }
687         }
688         write_unlock_bh(&tbl->lock);
689         return -ENOENT;
690 }
691
692 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
693                                     struct net_device *dev)
694 {
695         struct pneigh_entry *n, **np, *freelist = NULL;
696         u32 h;
697
698         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
699                 np = &tbl->phash_buckets[h];
700                 while ((n = *np) != NULL) {
701                         if (!dev || n->dev == dev) {
702                                 *np = n->next;
703                                 n->next = freelist;
704                                 freelist = n;
705                                 continue;
706                         }
707                         np = &n->next;
708                 }
709         }
710         write_unlock_bh(&tbl->lock);
711         while ((n = freelist)) {
712                 freelist = n->next;
713                 n->next = NULL;
714                 if (tbl->pdestructor)
715                         tbl->pdestructor(n);
716                 if (n->dev)
717                         dev_put(n->dev);
718                 kfree(n);
719         }
720         return -ENOENT;
721 }
722
723 static void neigh_parms_destroy(struct neigh_parms *parms);
724
725 static inline void neigh_parms_put(struct neigh_parms *parms)
726 {
727         if (refcount_dec_and_test(&parms->refcnt))
728                 neigh_parms_destroy(parms);
729 }
730
731 /*
732  *      neighbour must already be out of the table;
733  *
734  */
735 void neigh_destroy(struct neighbour *neigh)
736 {
737         struct net_device *dev = neigh->dev;
738
739         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
740
741         if (!neigh->dead) {
742                 pr_warn("Destroying alive neighbour %p\n", neigh);
743                 dump_stack();
744                 return;
745         }
746
747         if (neigh_del_timer(neigh))
748                 pr_warn("Impossible event\n");
749
750         write_lock_bh(&neigh->lock);
751         __skb_queue_purge(&neigh->arp_queue);
752         write_unlock_bh(&neigh->lock);
753         neigh->arp_queue_len_bytes = 0;
754
755         if (dev->netdev_ops->ndo_neigh_destroy)
756                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
757
758         dev_put(dev);
759         neigh_parms_put(neigh->parms);
760
761         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
762
763         atomic_dec(&neigh->tbl->entries);
764         kfree_rcu(neigh, rcu);
765 }
766 EXPORT_SYMBOL(neigh_destroy);
767
768 /* Neighbour state is suspicious;
769    disable fast path.
770
771    Called with write_locked neigh.
772  */
773 static void neigh_suspect(struct neighbour *neigh)
774 {
775         neigh_dbg(2, "neigh %p is suspected\n", neigh);
776
777         neigh->output = neigh->ops->output;
778 }
779
780 /* Neighbour state is OK;
781    enable fast path.
782
783    Called with write_locked neigh.
784  */
785 static void neigh_connect(struct neighbour *neigh)
786 {
787         neigh_dbg(2, "neigh %p is connected\n", neigh);
788
789         neigh->output = neigh->ops->connected_output;
790 }
791
792 static void neigh_periodic_work(struct work_struct *work)
793 {
794         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
795         struct neighbour *n;
796         struct neighbour __rcu **np;
797         unsigned int i;
798         struct neigh_hash_table *nht;
799
800         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
801
802         write_lock_bh(&tbl->lock);
803         nht = rcu_dereference_protected(tbl->nht,
804                                         lockdep_is_held(&tbl->lock));
805
806         /*
807          *      periodically recompute ReachableTime from random function
808          */
809
810         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
811                 struct neigh_parms *p;
812                 tbl->last_rand = jiffies;
813                 list_for_each_entry(p, &tbl->parms_list, list)
814                         p->reachable_time =
815                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
816         }
817
818         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
819                 goto out;
820
821         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
822                 np = &nht->hash_buckets[i];
823
824                 while ((n = rcu_dereference_protected(*np,
825                                 lockdep_is_held(&tbl->lock))) != NULL) {
826                         unsigned int state;
827
828                         write_lock(&n->lock);
829
830                         state = n->nud_state;
831                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
832                             (n->flags & NTF_EXT_LEARNED)) {
833                                 write_unlock(&n->lock);
834                                 goto next_elt;
835                         }
836
837                         if (time_before(n->used, n->confirmed))
838                                 n->used = n->confirmed;
839
840                         if (refcount_read(&n->refcnt) == 1 &&
841                             (state == NUD_FAILED ||
842                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
843                                 *np = n->next;
844                                 n->dead = 1;
845                                 write_unlock(&n->lock);
846                                 neigh_cleanup_and_release(n);
847                                 continue;
848                         }
849                         write_unlock(&n->lock);
850
851 next_elt:
852                         np = &n->next;
853                 }
854                 /*
855                  * It's fine to release lock here, even if hash table
856                  * grows while we are preempted.
857                  */
858                 write_unlock_bh(&tbl->lock);
859                 cond_resched();
860                 write_lock_bh(&tbl->lock);
861                 nht = rcu_dereference_protected(tbl->nht,
862                                                 lockdep_is_held(&tbl->lock));
863         }
864 out:
865         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
866          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
867          * BASE_REACHABLE_TIME.
868          */
869         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
870                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
871         write_unlock_bh(&tbl->lock);
872 }
873
874 static __inline__ int neigh_max_probes(struct neighbour *n)
875 {
876         struct neigh_parms *p = n->parms;
877         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
878                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
879                 NEIGH_VAR(p, MCAST_PROBES));
880 }
881
882 static void neigh_invalidate(struct neighbour *neigh)
883         __releases(neigh->lock)
884         __acquires(neigh->lock)
885 {
886         struct sk_buff *skb;
887
888         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
889         neigh_dbg(2, "neigh %p is failed\n", neigh);
890         neigh->updated = jiffies;
891
892         /* It is very thin place. report_unreachable is very complicated
893            routine. Particularly, it can hit the same neighbour entry!
894
895            So that, we try to be accurate and avoid dead loop. --ANK
896          */
897         while (neigh->nud_state == NUD_FAILED &&
898                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
899                 write_unlock(&neigh->lock);
900                 neigh->ops->error_report(neigh, skb);
901                 write_lock(&neigh->lock);
902         }
903         __skb_queue_purge(&neigh->arp_queue);
904         neigh->arp_queue_len_bytes = 0;
905 }
906
907 static void neigh_probe(struct neighbour *neigh)
908         __releases(neigh->lock)
909 {
910         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
911         /* keep skb alive even if arp_queue overflows */
912         if (skb)
913                 skb = skb_clone(skb, GFP_ATOMIC);
914         write_unlock(&neigh->lock);
915         if (neigh->ops->solicit)
916                 neigh->ops->solicit(neigh, skb);
917         atomic_inc(&neigh->probes);
918         kfree_skb(skb);
919 }
920
921 /* Called when a timer expires for a neighbour entry. */
922
923 static void neigh_timer_handler(struct timer_list *t)
924 {
925         unsigned long now, next;
926         struct neighbour *neigh = from_timer(neigh, t, timer);
927         unsigned int state;
928         int notify = 0;
929
930         write_lock(&neigh->lock);
931
932         state = neigh->nud_state;
933         now = jiffies;
934         next = now + HZ;
935
936         if (!(state & NUD_IN_TIMER))
937                 goto out;
938
939         if (state & NUD_REACHABLE) {
940                 if (time_before_eq(now,
941                                    neigh->confirmed + neigh->parms->reachable_time)) {
942                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
943                         next = neigh->confirmed + neigh->parms->reachable_time;
944                 } else if (time_before_eq(now,
945                                           neigh->used +
946                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
947                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
948                         neigh->nud_state = NUD_DELAY;
949                         neigh->updated = jiffies;
950                         neigh_suspect(neigh);
951                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
952                 } else {
953                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
954                         neigh->nud_state = NUD_STALE;
955                         neigh->updated = jiffies;
956                         neigh_suspect(neigh);
957                         notify = 1;
958                 }
959         } else if (state & NUD_DELAY) {
960                 if (time_before_eq(now,
961                                    neigh->confirmed +
962                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
963                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
964                         neigh->nud_state = NUD_REACHABLE;
965                         neigh->updated = jiffies;
966                         neigh_connect(neigh);
967                         notify = 1;
968                         next = neigh->confirmed + neigh->parms->reachable_time;
969                 } else {
970                         neigh_dbg(2, "neigh %p is probed\n", neigh);
971                         neigh->nud_state = NUD_PROBE;
972                         neigh->updated = jiffies;
973                         atomic_set(&neigh->probes, 0);
974                         notify = 1;
975                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
976                 }
977         } else {
978                 /* NUD_PROBE|NUD_INCOMPLETE */
979                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
980         }
981
982         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
983             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
984                 neigh->nud_state = NUD_FAILED;
985                 notify = 1;
986                 neigh_invalidate(neigh);
987                 goto out;
988         }
989
990         if (neigh->nud_state & NUD_IN_TIMER) {
991                 if (time_before(next, jiffies + HZ/2))
992                         next = jiffies + HZ/2;
993                 if (!mod_timer(&neigh->timer, next))
994                         neigh_hold(neigh);
995         }
996         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
997                 neigh_probe(neigh);
998         } else {
999 out:
1000                 write_unlock(&neigh->lock);
1001         }
1002
1003         if (notify)
1004                 neigh_update_notify(neigh, 0);
1005
1006         neigh_release(neigh);
1007 }
1008
1009 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1010 {
1011         int rc;
1012         bool immediate_probe = false;
1013
1014         write_lock_bh(&neigh->lock);
1015
1016         rc = 0;
1017         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1018                 goto out_unlock_bh;
1019         if (neigh->dead)
1020                 goto out_dead;
1021
1022         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1023                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1024                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1025                         unsigned long next, now = jiffies;
1026
1027                         atomic_set(&neigh->probes,
1028                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1029                         neigh_del_timer(neigh);
1030                         neigh->nud_state     = NUD_INCOMPLETE;
1031                         neigh->updated = now;
1032                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1033                                          HZ/2);
1034                         neigh_add_timer(neigh, next);
1035                         immediate_probe = true;
1036                 } else {
1037                         neigh->nud_state = NUD_FAILED;
1038                         neigh->updated = jiffies;
1039                         write_unlock_bh(&neigh->lock);
1040
1041                         kfree_skb(skb);
1042                         return 1;
1043                 }
1044         } else if (neigh->nud_state & NUD_STALE) {
1045                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1046                 neigh_del_timer(neigh);
1047                 neigh->nud_state = NUD_DELAY;
1048                 neigh->updated = jiffies;
1049                 neigh_add_timer(neigh, jiffies +
1050                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1051         }
1052
1053         if (neigh->nud_state == NUD_INCOMPLETE) {
1054                 if (skb) {
1055                         while (neigh->arp_queue_len_bytes + skb->truesize >
1056                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1057                                 struct sk_buff *buff;
1058
1059                                 buff = __skb_dequeue(&neigh->arp_queue);
1060                                 if (!buff)
1061                                         break;
1062                                 neigh->arp_queue_len_bytes -= buff->truesize;
1063                                 kfree_skb(buff);
1064                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1065                         }
1066                         skb_dst_force(skb);
1067                         __skb_queue_tail(&neigh->arp_queue, skb);
1068                         neigh->arp_queue_len_bytes += skb->truesize;
1069                 }
1070                 rc = 1;
1071         }
1072 out_unlock_bh:
1073         if (immediate_probe)
1074                 neigh_probe(neigh);
1075         else
1076                 write_unlock(&neigh->lock);
1077         local_bh_enable();
1078         return rc;
1079
1080 out_dead:
1081         if (neigh->nud_state & NUD_STALE)
1082                 goto out_unlock_bh;
1083         write_unlock_bh(&neigh->lock);
1084         kfree_skb(skb);
1085         return 1;
1086 }
1087 EXPORT_SYMBOL(__neigh_event_send);
1088
1089 static void neigh_update_hhs(struct neighbour *neigh)
1090 {
1091         struct hh_cache *hh;
1092         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1093                 = NULL;
1094
1095         if (neigh->dev->header_ops)
1096                 update = neigh->dev->header_ops->cache_update;
1097
1098         if (update) {
1099                 hh = &neigh->hh;
1100                 if (READ_ONCE(hh->hh_len)) {
1101                         write_seqlock_bh(&hh->hh_lock);
1102                         update(hh, neigh->dev, neigh->ha);
1103                         write_sequnlock_bh(&hh->hh_lock);
1104                 }
1105         }
1106 }
1107
1108
1109
1110 /* Generic update routine.
1111    -- lladdr is new lladdr or NULL, if it is not supplied.
1112    -- new    is new state.
1113    -- flags
1114         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1115                                 if it is different.
1116         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1117                                 lladdr instead of overriding it
1118                                 if it is different.
1119         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1120
1121         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1122                                 NTF_ROUTER flag.
1123         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1124                                 a router.
1125
1126    Caller MUST hold reference count on the entry.
1127  */
1128
1129 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1130                  u32 flags, u32 nlmsg_pid)
1131 {
1132         u8 old;
1133         int err;
1134         int notify = 0;
1135         struct net_device *dev;
1136         int update_isrouter = 0;
1137
1138         write_lock_bh(&neigh->lock);
1139
1140         dev    = neigh->dev;
1141         old    = neigh->nud_state;
1142         err    = -EPERM;
1143
1144         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1145             (old & (NUD_NOARP | NUD_PERMANENT)))
1146                 goto out;
1147         if (neigh->dead)
1148                 goto out;
1149
1150         neigh_update_ext_learned(neigh, flags, &notify);
1151
1152         if (!(new & NUD_VALID)) {
1153                 neigh_del_timer(neigh);
1154                 if (old & NUD_CONNECTED)
1155                         neigh_suspect(neigh);
1156                 neigh->nud_state = new;
1157                 err = 0;
1158                 notify = old & NUD_VALID;
1159                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1160                     (new & NUD_FAILED)) {
1161                         neigh_invalidate(neigh);
1162                         notify = 1;
1163                 }
1164                 goto out;
1165         }
1166
1167         /* Compare new lladdr with cached one */
1168         if (!dev->addr_len) {
1169                 /* First case: device needs no address. */
1170                 lladdr = neigh->ha;
1171         } else if (lladdr) {
1172                 /* The second case: if something is already cached
1173                    and a new address is proposed:
1174                    - compare new & old
1175                    - if they are different, check override flag
1176                  */
1177                 if ((old & NUD_VALID) &&
1178                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1179                         lladdr = neigh->ha;
1180         } else {
1181                 /* No address is supplied; if we know something,
1182                    use it, otherwise discard the request.
1183                  */
1184                 err = -EINVAL;
1185                 if (!(old & NUD_VALID))
1186                         goto out;
1187                 lladdr = neigh->ha;
1188         }
1189
1190         /* Update confirmed timestamp for neighbour entry after we
1191          * received ARP packet even if it doesn't change IP to MAC binding.
1192          */
1193         if (new & NUD_CONNECTED)
1194                 neigh->confirmed = jiffies;
1195
1196         /* If entry was valid and address is not changed,
1197            do not change entry state, if new one is STALE.
1198          */
1199         err = 0;
1200         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1201         if (old & NUD_VALID) {
1202                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1203                         update_isrouter = 0;
1204                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1205                             (old & NUD_CONNECTED)) {
1206                                 lladdr = neigh->ha;
1207                                 new = NUD_STALE;
1208                         } else
1209                                 goto out;
1210                 } else {
1211                         if (lladdr == neigh->ha && new == NUD_STALE &&
1212                             !(flags & NEIGH_UPDATE_F_ADMIN))
1213                                 new = old;
1214                 }
1215         }
1216
1217         /* Update timestamp only once we know we will make a change to the
1218          * neighbour entry. Otherwise we risk to move the locktime window with
1219          * noop updates and ignore relevant ARP updates.
1220          */
1221         if (new != old || lladdr != neigh->ha)
1222                 neigh->updated = jiffies;
1223
1224         if (new != old) {
1225                 neigh_del_timer(neigh);
1226                 if (new & NUD_PROBE)
1227                         atomic_set(&neigh->probes, 0);
1228                 if (new & NUD_IN_TIMER)
1229                         neigh_add_timer(neigh, (jiffies +
1230                                                 ((new & NUD_REACHABLE) ?
1231                                                  neigh->parms->reachable_time :
1232                                                  0)));
1233                 neigh->nud_state = new;
1234                 notify = 1;
1235         }
1236
1237         if (lladdr != neigh->ha) {
1238                 write_seqlock(&neigh->ha_lock);
1239                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1240                 write_sequnlock(&neigh->ha_lock);
1241                 neigh_update_hhs(neigh);
1242                 if (!(new & NUD_CONNECTED))
1243                         neigh->confirmed = jiffies -
1244                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1245                 notify = 1;
1246         }
1247         if (new == old)
1248                 goto out;
1249         if (new & NUD_CONNECTED)
1250                 neigh_connect(neigh);
1251         else
1252                 neigh_suspect(neigh);
1253         if (!(old & NUD_VALID)) {
1254                 struct sk_buff *skb;
1255
1256                 /* Again: avoid dead loop if something went wrong */
1257
1258                 while (neigh->nud_state & NUD_VALID &&
1259                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1260                         struct dst_entry *dst = skb_dst(skb);
1261                         struct neighbour *n2, *n1 = neigh;
1262                         write_unlock_bh(&neigh->lock);
1263
1264                         rcu_read_lock();
1265
1266                         /* Why not just use 'neigh' as-is?  The problem is that
1267                          * things such as shaper, eql, and sch_teql can end up
1268                          * using alternative, different, neigh objects to output
1269                          * the packet in the output path.  So what we need to do
1270                          * here is re-lookup the top-level neigh in the path so
1271                          * we can reinject the packet there.
1272                          */
1273                         n2 = NULL;
1274                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1275                                 n2 = dst_neigh_lookup_skb(dst, skb);
1276                                 if (n2)
1277                                         n1 = n2;
1278                         }
1279                         n1->output(n1, skb);
1280                         if (n2)
1281                                 neigh_release(n2);
1282                         rcu_read_unlock();
1283
1284                         write_lock_bh(&neigh->lock);
1285                 }
1286                 __skb_queue_purge(&neigh->arp_queue);
1287                 neigh->arp_queue_len_bytes = 0;
1288         }
1289 out:
1290         if (update_isrouter) {
1291                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1292                         (neigh->flags | NTF_ROUTER) :
1293                         (neigh->flags & ~NTF_ROUTER);
1294         }
1295         write_unlock_bh(&neigh->lock);
1296
1297         if (notify)
1298                 neigh_update_notify(neigh, nlmsg_pid);
1299
1300         return err;
1301 }
1302 EXPORT_SYMBOL(neigh_update);
1303
1304 /* Update the neigh to listen temporarily for probe responses, even if it is
1305  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1306  */
1307 void __neigh_set_probe_once(struct neighbour *neigh)
1308 {
1309         if (neigh->dead)
1310                 return;
1311         neigh->updated = jiffies;
1312         if (!(neigh->nud_state & NUD_FAILED))
1313                 return;
1314         neigh->nud_state = NUD_INCOMPLETE;
1315         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1316         neigh_add_timer(neigh,
1317                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1318 }
1319 EXPORT_SYMBOL(__neigh_set_probe_once);
1320
1321 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1322                                  u8 *lladdr, void *saddr,
1323                                  struct net_device *dev)
1324 {
1325         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1326                                                  lladdr || !dev->addr_len);
1327         if (neigh)
1328                 neigh_update(neigh, lladdr, NUD_STALE,
1329                              NEIGH_UPDATE_F_OVERRIDE, 0);
1330         return neigh;
1331 }
1332 EXPORT_SYMBOL(neigh_event_ns);
1333
1334 /* called with read_lock_bh(&n->lock); */
1335 static void neigh_hh_init(struct neighbour *n)
1336 {
1337         struct net_device *dev = n->dev;
1338         __be16 prot = n->tbl->protocol;
1339         struct hh_cache *hh = &n->hh;
1340
1341         write_lock_bh(&n->lock);
1342
1343         /* Only one thread can come in here and initialize the
1344          * hh_cache entry.
1345          */
1346         if (!hh->hh_len)
1347                 dev->header_ops->cache(n, hh, prot);
1348
1349         write_unlock_bh(&n->lock);
1350 }
1351
1352 /* Slow and careful. */
1353
1354 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1355 {
1356         int rc = 0;
1357
1358         if (!neigh_event_send(neigh, skb)) {
1359                 int err;
1360                 struct net_device *dev = neigh->dev;
1361                 unsigned int seq;
1362
1363                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1364                         neigh_hh_init(neigh);
1365
1366                 do {
1367                         __skb_pull(skb, skb_network_offset(skb));
1368                         seq = read_seqbegin(&neigh->ha_lock);
1369                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1370                                               neigh->ha, NULL, skb->len);
1371                 } while (read_seqretry(&neigh->ha_lock, seq));
1372
1373                 if (err >= 0)
1374                         rc = dev_queue_xmit(skb);
1375                 else
1376                         goto out_kfree_skb;
1377         }
1378 out:
1379         return rc;
1380 out_kfree_skb:
1381         rc = -EINVAL;
1382         kfree_skb(skb);
1383         goto out;
1384 }
1385 EXPORT_SYMBOL(neigh_resolve_output);
1386
1387 /* As fast as possible without hh cache */
1388
1389 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1390 {
1391         struct net_device *dev = neigh->dev;
1392         unsigned int seq;
1393         int err;
1394
1395         do {
1396                 __skb_pull(skb, skb_network_offset(skb));
1397                 seq = read_seqbegin(&neigh->ha_lock);
1398                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1399                                       neigh->ha, NULL, skb->len);
1400         } while (read_seqretry(&neigh->ha_lock, seq));
1401
1402         if (err >= 0)
1403                 err = dev_queue_xmit(skb);
1404         else {
1405                 err = -EINVAL;
1406                 kfree_skb(skb);
1407         }
1408         return err;
1409 }
1410 EXPORT_SYMBOL(neigh_connected_output);
1411
1412 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1413 {
1414         return dev_queue_xmit(skb);
1415 }
1416 EXPORT_SYMBOL(neigh_direct_output);
1417
1418 static void neigh_proxy_process(struct timer_list *t)
1419 {
1420         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1421         long sched_next = 0;
1422         unsigned long now = jiffies;
1423         struct sk_buff *skb, *n;
1424
1425         spin_lock(&tbl->proxy_queue.lock);
1426
1427         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1428                 long tdif = NEIGH_CB(skb)->sched_next - now;
1429
1430                 if (tdif <= 0) {
1431                         struct net_device *dev = skb->dev;
1432
1433                         __skb_unlink(skb, &tbl->proxy_queue);
1434                         if (tbl->proxy_redo && netif_running(dev)) {
1435                                 rcu_read_lock();
1436                                 tbl->proxy_redo(skb);
1437                                 rcu_read_unlock();
1438                         } else {
1439                                 kfree_skb(skb);
1440                         }
1441
1442                         dev_put(dev);
1443                 } else if (!sched_next || tdif < sched_next)
1444                         sched_next = tdif;
1445         }
1446         del_timer(&tbl->proxy_timer);
1447         if (sched_next)
1448                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1449         spin_unlock(&tbl->proxy_queue.lock);
1450 }
1451
1452 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1453                     struct sk_buff *skb)
1454 {
1455         unsigned long now = jiffies;
1456
1457         unsigned long sched_next = now + (prandom_u32() %
1458                                           NEIGH_VAR(p, PROXY_DELAY));
1459
1460         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1461                 kfree_skb(skb);
1462                 return;
1463         }
1464
1465         NEIGH_CB(skb)->sched_next = sched_next;
1466         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1467
1468         spin_lock(&tbl->proxy_queue.lock);
1469         if (del_timer(&tbl->proxy_timer)) {
1470                 if (time_before(tbl->proxy_timer.expires, sched_next))
1471                         sched_next = tbl->proxy_timer.expires;
1472         }
1473         skb_dst_drop(skb);
1474         dev_hold(skb->dev);
1475         __skb_queue_tail(&tbl->proxy_queue, skb);
1476         mod_timer(&tbl->proxy_timer, sched_next);
1477         spin_unlock(&tbl->proxy_queue.lock);
1478 }
1479 EXPORT_SYMBOL(pneigh_enqueue);
1480
1481 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1482                                                       struct net *net, int ifindex)
1483 {
1484         struct neigh_parms *p;
1485
1486         list_for_each_entry(p, &tbl->parms_list, list) {
1487                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1488                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1489                         return p;
1490         }
1491
1492         return NULL;
1493 }
1494
1495 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1496                                       struct neigh_table *tbl)
1497 {
1498         struct neigh_parms *p;
1499         struct net *net = dev_net(dev);
1500         const struct net_device_ops *ops = dev->netdev_ops;
1501
1502         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1503         if (p) {
1504                 p->tbl            = tbl;
1505                 refcount_set(&p->refcnt, 1);
1506                 p->reachable_time =
1507                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1508                 dev_hold(dev);
1509                 p->dev = dev;
1510                 write_pnet(&p->net, net);
1511                 p->sysctl_table = NULL;
1512
1513                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1514                         dev_put(dev);
1515                         kfree(p);
1516                         return NULL;
1517                 }
1518
1519                 write_lock_bh(&tbl->lock);
1520                 list_add(&p->list, &tbl->parms.list);
1521                 write_unlock_bh(&tbl->lock);
1522
1523                 neigh_parms_data_state_cleanall(p);
1524         }
1525         return p;
1526 }
1527 EXPORT_SYMBOL(neigh_parms_alloc);
1528
1529 static void neigh_rcu_free_parms(struct rcu_head *head)
1530 {
1531         struct neigh_parms *parms =
1532                 container_of(head, struct neigh_parms, rcu_head);
1533
1534         neigh_parms_put(parms);
1535 }
1536
1537 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1538 {
1539         if (!parms || parms == &tbl->parms)
1540                 return;
1541         write_lock_bh(&tbl->lock);
1542         list_del(&parms->list);
1543         parms->dead = 1;
1544         write_unlock_bh(&tbl->lock);
1545         if (parms->dev)
1546                 dev_put(parms->dev);
1547         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1548 }
1549 EXPORT_SYMBOL(neigh_parms_release);
1550
1551 static void neigh_parms_destroy(struct neigh_parms *parms)
1552 {
1553         kfree(parms);
1554 }
1555
1556 static struct lock_class_key neigh_table_proxy_queue_class;
1557
1558 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1559
1560 void neigh_table_init(int index, struct neigh_table *tbl)
1561 {
1562         unsigned long now = jiffies;
1563         unsigned long phsize;
1564
1565         INIT_LIST_HEAD(&tbl->parms_list);
1566         list_add(&tbl->parms.list, &tbl->parms_list);
1567         write_pnet(&tbl->parms.net, &init_net);
1568         refcount_set(&tbl->parms.refcnt, 1);
1569         tbl->parms.reachable_time =
1570                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1571
1572         tbl->stats = alloc_percpu(struct neigh_statistics);
1573         if (!tbl->stats)
1574                 panic("cannot create neighbour cache statistics");
1575
1576 #ifdef CONFIG_PROC_FS
1577         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1578                               &neigh_stat_seq_ops, tbl))
1579                 panic("cannot create neighbour proc dir entry");
1580 #endif
1581
1582         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1583
1584         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1585         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1586
1587         if (!tbl->nht || !tbl->phash_buckets)
1588                 panic("cannot allocate neighbour cache hashes");
1589
1590         if (!tbl->entry_size)
1591                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1592                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1593         else
1594                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1595
1596         rwlock_init(&tbl->lock);
1597         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1598         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1599                         tbl->parms.reachable_time);
1600         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1601         skb_queue_head_init_class(&tbl->proxy_queue,
1602                         &neigh_table_proxy_queue_class);
1603
1604         tbl->last_flush = now;
1605         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1606
1607         neigh_tables[index] = tbl;
1608 }
1609 EXPORT_SYMBOL(neigh_table_init);
1610
1611 int neigh_table_clear(int index, struct neigh_table *tbl)
1612 {
1613         neigh_tables[index] = NULL;
1614         /* It is not clean... Fix it to unload IPv6 module safely */
1615         cancel_delayed_work_sync(&tbl->gc_work);
1616         del_timer_sync(&tbl->proxy_timer);
1617         pneigh_queue_purge(&tbl->proxy_queue);
1618         neigh_ifdown(tbl, NULL);
1619         if (atomic_read(&tbl->entries))
1620                 pr_crit("neighbour leakage\n");
1621
1622         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1623                  neigh_hash_free_rcu);
1624         tbl->nht = NULL;
1625
1626         kfree(tbl->phash_buckets);
1627         tbl->phash_buckets = NULL;
1628
1629         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1630
1631         free_percpu(tbl->stats);
1632         tbl->stats = NULL;
1633
1634         return 0;
1635 }
1636 EXPORT_SYMBOL(neigh_table_clear);
1637
1638 static struct neigh_table *neigh_find_table(int family)
1639 {
1640         struct neigh_table *tbl = NULL;
1641
1642         switch (family) {
1643         case AF_INET:
1644                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1645                 break;
1646         case AF_INET6:
1647                 tbl = neigh_tables[NEIGH_ND_TABLE];
1648                 break;
1649         case AF_DECnet:
1650                 tbl = neigh_tables[NEIGH_DN_TABLE];
1651                 break;
1652         }
1653
1654         return tbl;
1655 }
1656
1657 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1658                         struct netlink_ext_ack *extack)
1659 {
1660         struct net *net = sock_net(skb->sk);
1661         struct ndmsg *ndm;
1662         struct nlattr *dst_attr;
1663         struct neigh_table *tbl;
1664         struct neighbour *neigh;
1665         struct net_device *dev = NULL;
1666         int err = -EINVAL;
1667
1668         ASSERT_RTNL();
1669         if (nlmsg_len(nlh) < sizeof(*ndm))
1670                 goto out;
1671
1672         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1673         if (dst_attr == NULL)
1674                 goto out;
1675
1676         ndm = nlmsg_data(nlh);
1677         if (ndm->ndm_ifindex) {
1678                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1679                 if (dev == NULL) {
1680                         err = -ENODEV;
1681                         goto out;
1682                 }
1683         }
1684
1685         tbl = neigh_find_table(ndm->ndm_family);
1686         if (tbl == NULL)
1687                 return -EAFNOSUPPORT;
1688
1689         if (nla_len(dst_attr) < (int)tbl->key_len)
1690                 goto out;
1691
1692         if (ndm->ndm_flags & NTF_PROXY) {
1693                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1694                 goto out;
1695         }
1696
1697         if (dev == NULL)
1698                 goto out;
1699
1700         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1701         if (neigh == NULL) {
1702                 err = -ENOENT;
1703                 goto out;
1704         }
1705
1706         err = neigh_update(neigh, NULL, NUD_FAILED,
1707                            NEIGH_UPDATE_F_OVERRIDE |
1708                            NEIGH_UPDATE_F_ADMIN,
1709                            NETLINK_CB(skb).portid);
1710         write_lock_bh(&tbl->lock);
1711         neigh_release(neigh);
1712         neigh_remove_one(neigh, tbl);
1713         write_unlock_bh(&tbl->lock);
1714
1715 out:
1716         return err;
1717 }
1718
1719 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1720                      struct netlink_ext_ack *extack)
1721 {
1722         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1723         struct net *net = sock_net(skb->sk);
1724         struct ndmsg *ndm;
1725         struct nlattr *tb[NDA_MAX+1];
1726         struct neigh_table *tbl;
1727         struct net_device *dev = NULL;
1728         struct neighbour *neigh;
1729         void *dst, *lladdr;
1730         int err;
1731
1732         ASSERT_RTNL();
1733         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1734         if (err < 0)
1735                 goto out;
1736
1737         err = -EINVAL;
1738         if (tb[NDA_DST] == NULL)
1739                 goto out;
1740
1741         ndm = nlmsg_data(nlh);
1742         if (ndm->ndm_ifindex) {
1743                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1744                 if (dev == NULL) {
1745                         err = -ENODEV;
1746                         goto out;
1747                 }
1748
1749                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1750                         goto out;
1751         }
1752
1753         tbl = neigh_find_table(ndm->ndm_family);
1754         if (tbl == NULL)
1755                 return -EAFNOSUPPORT;
1756
1757         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1758                 goto out;
1759         dst = nla_data(tb[NDA_DST]);
1760         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1761
1762         if (ndm->ndm_flags & NTF_PROXY) {
1763                 struct pneigh_entry *pn;
1764
1765                 err = -ENOBUFS;
1766                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1767                 if (pn) {
1768                         pn->flags = ndm->ndm_flags;
1769                         err = 0;
1770                 }
1771                 goto out;
1772         }
1773
1774         if (dev == NULL)
1775                 goto out;
1776
1777         neigh = neigh_lookup(tbl, dst, dev);
1778         if (neigh == NULL) {
1779                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1780                         err = -ENOENT;
1781                         goto out;
1782                 }
1783
1784                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1785                 if (IS_ERR(neigh)) {
1786                         err = PTR_ERR(neigh);
1787                         goto out;
1788                 }
1789         } else {
1790                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1791                         err = -EEXIST;
1792                         neigh_release(neigh);
1793                         goto out;
1794                 }
1795
1796                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1797                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1798         }
1799
1800         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1801                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1802
1803         if (ndm->ndm_flags & NTF_USE) {
1804                 neigh_event_send(neigh, NULL);
1805                 err = 0;
1806         } else
1807                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1808                                    NETLINK_CB(skb).portid);
1809         neigh_release(neigh);
1810
1811 out:
1812         return err;
1813 }
1814
1815 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1816 {
1817         struct nlattr *nest;
1818
1819         nest = nla_nest_start(skb, NDTA_PARMS);
1820         if (nest == NULL)
1821                 return -ENOBUFS;
1822
1823         if ((parms->dev &&
1824              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1825             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1826             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1827                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1828             /* approximative value for deprecated QUEUE_LEN (in packets) */
1829             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1830                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1831             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1832             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1833             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1834                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1835             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1836                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1837             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1838                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1839             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1840                           NDTPA_PAD) ||
1841             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1842                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1843             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1844                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1845             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1846                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1847             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1848                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1849             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1850                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1851             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1852                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1853             nla_put_msecs(skb, NDTPA_LOCKTIME,
1854                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1855                 goto nla_put_failure;
1856         return nla_nest_end(skb, nest);
1857
1858 nla_put_failure:
1859         nla_nest_cancel(skb, nest);
1860         return -EMSGSIZE;
1861 }
1862
1863 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1864                               u32 pid, u32 seq, int type, int flags)
1865 {
1866         struct nlmsghdr *nlh;
1867         struct ndtmsg *ndtmsg;
1868
1869         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1870         if (nlh == NULL)
1871                 return -EMSGSIZE;
1872
1873         ndtmsg = nlmsg_data(nlh);
1874
1875         read_lock_bh(&tbl->lock);
1876         ndtmsg->ndtm_family = tbl->family;
1877         ndtmsg->ndtm_pad1   = 0;
1878         ndtmsg->ndtm_pad2   = 0;
1879
1880         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1881             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1882             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1883             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1884             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1885                 goto nla_put_failure;
1886         {
1887                 unsigned long now = jiffies;
1888                 long flush_delta = now - tbl->last_flush;
1889                 long rand_delta = now - tbl->last_rand;
1890                 struct neigh_hash_table *nht;
1891                 struct ndt_config ndc = {
1892                         .ndtc_key_len           = tbl->key_len,
1893                         .ndtc_entry_size        = tbl->entry_size,
1894                         .ndtc_entries           = atomic_read(&tbl->entries),
1895                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1896                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1897                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1898                 };
1899
1900                 rcu_read_lock_bh();
1901                 nht = rcu_dereference_bh(tbl->nht);
1902                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1903                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1904                 rcu_read_unlock_bh();
1905
1906                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1907                         goto nla_put_failure;
1908         }
1909
1910         {
1911                 int cpu;
1912                 struct ndt_stats ndst;
1913
1914                 memset(&ndst, 0, sizeof(ndst));
1915
1916                 for_each_possible_cpu(cpu) {
1917                         struct neigh_statistics *st;
1918
1919                         st = per_cpu_ptr(tbl->stats, cpu);
1920                         ndst.ndts_allocs                += st->allocs;
1921                         ndst.ndts_destroys              += st->destroys;
1922                         ndst.ndts_hash_grows            += st->hash_grows;
1923                         ndst.ndts_res_failed            += st->res_failed;
1924                         ndst.ndts_lookups               += st->lookups;
1925                         ndst.ndts_hits                  += st->hits;
1926                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1927                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1928                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1929                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1930                         ndst.ndts_table_fulls           += st->table_fulls;
1931                 }
1932
1933                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1934                                   NDTA_PAD))
1935                         goto nla_put_failure;
1936         }
1937
1938         BUG_ON(tbl->parms.dev);
1939         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1940                 goto nla_put_failure;
1941
1942         read_unlock_bh(&tbl->lock);
1943         nlmsg_end(skb, nlh);
1944         return 0;
1945
1946 nla_put_failure:
1947         read_unlock_bh(&tbl->lock);
1948         nlmsg_cancel(skb, nlh);
1949         return -EMSGSIZE;
1950 }
1951
1952 static int neightbl_fill_param_info(struct sk_buff *skb,
1953                                     struct neigh_table *tbl,
1954                                     struct neigh_parms *parms,
1955                                     u32 pid, u32 seq, int type,
1956                                     unsigned int flags)
1957 {
1958         struct ndtmsg *ndtmsg;
1959         struct nlmsghdr *nlh;
1960
1961         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1962         if (nlh == NULL)
1963                 return -EMSGSIZE;
1964
1965         ndtmsg = nlmsg_data(nlh);
1966
1967         read_lock_bh(&tbl->lock);
1968         ndtmsg->ndtm_family = tbl->family;
1969         ndtmsg->ndtm_pad1   = 0;
1970         ndtmsg->ndtm_pad2   = 0;
1971
1972         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1973             neightbl_fill_parms(skb, parms) < 0)
1974                 goto errout;
1975
1976         read_unlock_bh(&tbl->lock);
1977         nlmsg_end(skb, nlh);
1978         return 0;
1979 errout:
1980         read_unlock_bh(&tbl->lock);
1981         nlmsg_cancel(skb, nlh);
1982         return -EMSGSIZE;
1983 }
1984
1985 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1986         [NDTA_NAME]             = { .type = NLA_STRING },
1987         [NDTA_THRESH1]          = { .type = NLA_U32 },
1988         [NDTA_THRESH2]          = { .type = NLA_U32 },
1989         [NDTA_THRESH3]          = { .type = NLA_U32 },
1990         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1991         [NDTA_PARMS]            = { .type = NLA_NESTED },
1992 };
1993
1994 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1995         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1996         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1997         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1998         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1999         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2000         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2001         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2002         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2003         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2004         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2005         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2006         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2007         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2008         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2009 };
2010
2011 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2012                         struct netlink_ext_ack *extack)
2013 {
2014         struct net *net = sock_net(skb->sk);
2015         struct neigh_table *tbl;
2016         struct ndtmsg *ndtmsg;
2017         struct nlattr *tb[NDTA_MAX+1];
2018         bool found = false;
2019         int err, tidx;
2020
2021         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2022                           nl_neightbl_policy, extack);
2023         if (err < 0)
2024                 goto errout;
2025
2026         if (tb[NDTA_NAME] == NULL) {
2027                 err = -EINVAL;
2028                 goto errout;
2029         }
2030
2031         ndtmsg = nlmsg_data(nlh);
2032
2033         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2034                 tbl = neigh_tables[tidx];
2035                 if (!tbl)
2036                         continue;
2037                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2038                         continue;
2039                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2040                         found = true;
2041                         break;
2042                 }
2043         }
2044
2045         if (!found)
2046                 return -ENOENT;
2047
2048         /*
2049          * We acquire tbl->lock to be nice to the periodic timers and
2050          * make sure they always see a consistent set of values.
2051          */
2052         write_lock_bh(&tbl->lock);
2053
2054         if (tb[NDTA_PARMS]) {
2055                 struct nlattr *tbp[NDTPA_MAX+1];
2056                 struct neigh_parms *p;
2057                 int i, ifindex = 0;
2058
2059                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2060                                        nl_ntbl_parm_policy, extack);
2061                 if (err < 0)
2062                         goto errout_tbl_lock;
2063
2064                 if (tbp[NDTPA_IFINDEX])
2065                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2066
2067                 p = lookup_neigh_parms(tbl, net, ifindex);
2068                 if (p == NULL) {
2069                         err = -ENOENT;
2070                         goto errout_tbl_lock;
2071                 }
2072
2073                 for (i = 1; i <= NDTPA_MAX; i++) {
2074                         if (tbp[i] == NULL)
2075                                 continue;
2076
2077                         switch (i) {
2078                         case NDTPA_QUEUE_LEN:
2079                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2080                                               nla_get_u32(tbp[i]) *
2081                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2082                                 break;
2083                         case NDTPA_QUEUE_LENBYTES:
2084                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2085                                               nla_get_u32(tbp[i]));
2086                                 break;
2087                         case NDTPA_PROXY_QLEN:
2088                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2089                                               nla_get_u32(tbp[i]));
2090                                 break;
2091                         case NDTPA_APP_PROBES:
2092                                 NEIGH_VAR_SET(p, APP_PROBES,
2093                                               nla_get_u32(tbp[i]));
2094                                 break;
2095                         case NDTPA_UCAST_PROBES:
2096                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2097                                               nla_get_u32(tbp[i]));
2098                                 break;
2099                         case NDTPA_MCAST_PROBES:
2100                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2101                                               nla_get_u32(tbp[i]));
2102                                 break;
2103                         case NDTPA_MCAST_REPROBES:
2104                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2105                                               nla_get_u32(tbp[i]));
2106                                 break;
2107                         case NDTPA_BASE_REACHABLE_TIME:
2108                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2109                                               nla_get_msecs(tbp[i]));
2110                                 /* update reachable_time as well, otherwise, the change will
2111                                  * only be effective after the next time neigh_periodic_work
2112                                  * decides to recompute it (can be multiple minutes)
2113                                  */
2114                                 p->reachable_time =
2115                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2116                                 break;
2117                         case NDTPA_GC_STALETIME:
2118                                 NEIGH_VAR_SET(p, GC_STALETIME,
2119                                               nla_get_msecs(tbp[i]));
2120                                 break;
2121                         case NDTPA_DELAY_PROBE_TIME:
2122                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2123                                               nla_get_msecs(tbp[i]));
2124                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2125                                 break;
2126                         case NDTPA_RETRANS_TIME:
2127                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2128                                               nla_get_msecs(tbp[i]));
2129                                 break;
2130                         case NDTPA_ANYCAST_DELAY:
2131                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2132                                               nla_get_msecs(tbp[i]));
2133                                 break;
2134                         case NDTPA_PROXY_DELAY:
2135                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2136                                               nla_get_msecs(tbp[i]));
2137                                 break;
2138                         case NDTPA_LOCKTIME:
2139                                 NEIGH_VAR_SET(p, LOCKTIME,
2140                                               nla_get_msecs(tbp[i]));
2141                                 break;
2142                         }
2143                 }
2144         }
2145
2146         err = -ENOENT;
2147         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2148              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2149             !net_eq(net, &init_net))
2150                 goto errout_tbl_lock;
2151
2152         if (tb[NDTA_THRESH1])
2153                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2154
2155         if (tb[NDTA_THRESH2])
2156                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2157
2158         if (tb[NDTA_THRESH3])
2159                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2160
2161         if (tb[NDTA_GC_INTERVAL])
2162                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2163
2164         err = 0;
2165
2166 errout_tbl_lock:
2167         write_unlock_bh(&tbl->lock);
2168 errout:
2169         return err;
2170 }
2171
2172 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2173 {
2174         struct net *net = sock_net(skb->sk);
2175         int family, tidx, nidx = 0;
2176         int tbl_skip = cb->args[0];
2177         int neigh_skip = cb->args[1];
2178         struct neigh_table *tbl;
2179
2180         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2181
2182         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2183                 struct neigh_parms *p;
2184
2185                 tbl = neigh_tables[tidx];
2186                 if (!tbl)
2187                         continue;
2188
2189                 if (tidx < tbl_skip || (family && tbl->family != family))
2190                         continue;
2191
2192                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2193                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2194                                        NLM_F_MULTI) < 0)
2195                         break;
2196
2197                 nidx = 0;
2198                 p = list_next_entry(&tbl->parms, list);
2199                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2200                         if (!net_eq(neigh_parms_net(p), net))
2201                                 continue;
2202
2203                         if (nidx < neigh_skip)
2204                                 goto next;
2205
2206                         if (neightbl_fill_param_info(skb, tbl, p,
2207                                                      NETLINK_CB(cb->skb).portid,
2208                                                      cb->nlh->nlmsg_seq,
2209                                                      RTM_NEWNEIGHTBL,
2210                                                      NLM_F_MULTI) < 0)
2211                                 goto out;
2212                 next:
2213                         nidx++;
2214                 }
2215
2216                 neigh_skip = 0;
2217         }
2218 out:
2219         cb->args[0] = tidx;
2220         cb->args[1] = nidx;
2221
2222         return skb->len;
2223 }
2224
2225 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2226                            u32 pid, u32 seq, int type, unsigned int flags)
2227 {
2228         unsigned long now = jiffies;
2229         struct nda_cacheinfo ci;
2230         struct nlmsghdr *nlh;
2231         struct ndmsg *ndm;
2232
2233         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2234         if (nlh == NULL)
2235                 return -EMSGSIZE;
2236
2237         ndm = nlmsg_data(nlh);
2238         ndm->ndm_family  = neigh->ops->family;
2239         ndm->ndm_pad1    = 0;
2240         ndm->ndm_pad2    = 0;
2241         ndm->ndm_flags   = neigh->flags;
2242         ndm->ndm_type    = neigh->type;
2243         ndm->ndm_ifindex = neigh->dev->ifindex;
2244
2245         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2246                 goto nla_put_failure;
2247
2248         read_lock_bh(&neigh->lock);
2249         ndm->ndm_state   = neigh->nud_state;
2250         if (neigh->nud_state & NUD_VALID) {
2251                 char haddr[MAX_ADDR_LEN];
2252
2253                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2254                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2255                         read_unlock_bh(&neigh->lock);
2256                         goto nla_put_failure;
2257                 }
2258         }
2259
2260         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2261         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2262         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2263         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2264         read_unlock_bh(&neigh->lock);
2265
2266         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2267             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2268                 goto nla_put_failure;
2269
2270         nlmsg_end(skb, nlh);
2271         return 0;
2272
2273 nla_put_failure:
2274         nlmsg_cancel(skb, nlh);
2275         return -EMSGSIZE;
2276 }
2277
2278 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2279                             u32 pid, u32 seq, int type, unsigned int flags,
2280                             struct neigh_table *tbl)
2281 {
2282         struct nlmsghdr *nlh;
2283         struct ndmsg *ndm;
2284
2285         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2286         if (nlh == NULL)
2287                 return -EMSGSIZE;
2288
2289         ndm = nlmsg_data(nlh);
2290         ndm->ndm_family  = tbl->family;
2291         ndm->ndm_pad1    = 0;
2292         ndm->ndm_pad2    = 0;
2293         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2294         ndm->ndm_type    = RTN_UNICAST;
2295         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2296         ndm->ndm_state   = NUD_NONE;
2297
2298         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2299                 goto nla_put_failure;
2300
2301         nlmsg_end(skb, nlh);
2302         return 0;
2303
2304 nla_put_failure:
2305         nlmsg_cancel(skb, nlh);
2306         return -EMSGSIZE;
2307 }
2308
2309 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2310 {
2311         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2312         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2313 }
2314
2315 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2316 {
2317         struct net_device *master;
2318
2319         if (!master_idx)
2320                 return false;
2321
2322         master = netdev_master_upper_dev_get(dev);
2323         if (!master || master->ifindex != master_idx)
2324                 return true;
2325
2326         return false;
2327 }
2328
2329 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2330 {
2331         if (filter_idx && dev->ifindex != filter_idx)
2332                 return true;
2333
2334         return false;
2335 }
2336
2337 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2338                             struct netlink_callback *cb)
2339 {
2340         struct net *net = sock_net(skb->sk);
2341         const struct nlmsghdr *nlh = cb->nlh;
2342         struct nlattr *tb[NDA_MAX + 1];
2343         struct neighbour *n;
2344         int rc, h, s_h = cb->args[1];
2345         int idx, s_idx = idx = cb->args[2];
2346         struct neigh_hash_table *nht;
2347         int filter_master_idx = 0, filter_idx = 0;
2348         unsigned int flags = NLM_F_MULTI;
2349         int err;
2350
2351         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2352         if (!err) {
2353                 if (tb[NDA_IFINDEX]) {
2354                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2355                                 return -EINVAL;
2356                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2357                 }
2358                 if (tb[NDA_MASTER]) {
2359                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2360                                 return -EINVAL;
2361                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2362                 }
2363                 if (filter_idx || filter_master_idx)
2364                         flags |= NLM_F_DUMP_FILTERED;
2365         }
2366
2367         rcu_read_lock_bh();
2368         nht = rcu_dereference_bh(tbl->nht);
2369
2370         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2371                 if (h > s_h)
2372                         s_idx = 0;
2373                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2374                      n != NULL;
2375                      n = rcu_dereference_bh(n->next)) {
2376                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2377                                 goto next;
2378                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2379                             neigh_master_filtered(n->dev, filter_master_idx))
2380                                 goto next;
2381                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2382                                             cb->nlh->nlmsg_seq,
2383                                             RTM_NEWNEIGH,
2384                                             flags) < 0) {
2385                                 rc = -1;
2386                                 goto out;
2387                         }
2388 next:
2389                         idx++;
2390                 }
2391         }
2392         rc = skb->len;
2393 out:
2394         rcu_read_unlock_bh();
2395         cb->args[1] = h;
2396         cb->args[2] = idx;
2397         return rc;
2398 }
2399
2400 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2401                              struct netlink_callback *cb)
2402 {
2403         struct pneigh_entry *n;
2404         struct net *net = sock_net(skb->sk);
2405         int rc, h, s_h = cb->args[3];
2406         int idx, s_idx = idx = cb->args[4];
2407
2408         read_lock_bh(&tbl->lock);
2409
2410         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2411                 if (h > s_h)
2412                         s_idx = 0;
2413                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2414                         if (idx < s_idx || pneigh_net(n) != net)
2415                                 goto next;
2416                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2417                                             cb->nlh->nlmsg_seq,
2418                                             RTM_NEWNEIGH,
2419                                             NLM_F_MULTI, tbl) < 0) {
2420                                 read_unlock_bh(&tbl->lock);
2421                                 rc = -1;
2422                                 goto out;
2423                         }
2424                 next:
2425                         idx++;
2426                 }
2427         }
2428
2429         read_unlock_bh(&tbl->lock);
2430         rc = skb->len;
2431 out:
2432         cb->args[3] = h;
2433         cb->args[4] = idx;
2434         return rc;
2435
2436 }
2437
2438 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2439 {
2440         struct neigh_table *tbl;
2441         int t, family, s_t;
2442         int proxy = 0;
2443         int err;
2444
2445         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2446
2447         /* check for full ndmsg structure presence, family member is
2448          * the same for both structures
2449          */
2450         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2451             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2452                 proxy = 1;
2453
2454         s_t = cb->args[0];
2455
2456         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2457                 tbl = neigh_tables[t];
2458
2459                 if (!tbl)
2460                         continue;
2461                 if (t < s_t || (family && tbl->family != family))
2462                         continue;
2463                 if (t > s_t)
2464                         memset(&cb->args[1], 0, sizeof(cb->args) -
2465                                                 sizeof(cb->args[0]));
2466                 if (proxy)
2467                         err = pneigh_dump_table(tbl, skb, cb);
2468                 else
2469                         err = neigh_dump_table(tbl, skb, cb);
2470                 if (err < 0)
2471                         break;
2472         }
2473
2474         cb->args[0] = t;
2475         return skb->len;
2476 }
2477
2478 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2479 {
2480         int chain;
2481         struct neigh_hash_table *nht;
2482
2483         rcu_read_lock_bh();
2484         nht = rcu_dereference_bh(tbl->nht);
2485
2486         read_lock(&tbl->lock); /* avoid resizes */
2487         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2488                 struct neighbour *n;
2489
2490                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2491                      n != NULL;
2492                      n = rcu_dereference_bh(n->next))
2493                         cb(n, cookie);
2494         }
2495         read_unlock(&tbl->lock);
2496         rcu_read_unlock_bh();
2497 }
2498 EXPORT_SYMBOL(neigh_for_each);
2499
2500 /* The tbl->lock must be held as a writer and BH disabled. */
2501 void __neigh_for_each_release(struct neigh_table *tbl,
2502                               int (*cb)(struct neighbour *))
2503 {
2504         int chain;
2505         struct neigh_hash_table *nht;
2506
2507         nht = rcu_dereference_protected(tbl->nht,
2508                                         lockdep_is_held(&tbl->lock));
2509         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2510                 struct neighbour *n;
2511                 struct neighbour __rcu **np;
2512
2513                 np = &nht->hash_buckets[chain];
2514                 while ((n = rcu_dereference_protected(*np,
2515                                         lockdep_is_held(&tbl->lock))) != NULL) {
2516                         int release;
2517
2518                         write_lock(&n->lock);
2519                         release = cb(n);
2520                         if (release) {
2521                                 rcu_assign_pointer(*np,
2522                                         rcu_dereference_protected(n->next,
2523                                                 lockdep_is_held(&tbl->lock)));
2524                                 n->dead = 1;
2525                         } else
2526                                 np = &n->next;
2527                         write_unlock(&n->lock);
2528                         if (release)
2529                                 neigh_cleanup_and_release(n);
2530                 }
2531         }
2532 }
2533 EXPORT_SYMBOL(__neigh_for_each_release);
2534
2535 int neigh_xmit(int index, struct net_device *dev,
2536                const void *addr, struct sk_buff *skb)
2537 {
2538         int err = -EAFNOSUPPORT;
2539         if (likely(index < NEIGH_NR_TABLES)) {
2540                 struct neigh_table *tbl;
2541                 struct neighbour *neigh;
2542
2543                 tbl = neigh_tables[index];
2544                 if (!tbl)
2545                         goto out;
2546                 rcu_read_lock_bh();
2547                 if (index == NEIGH_ARP_TABLE) {
2548                         u32 key = *((u32 *)addr);
2549
2550                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2551                 } else {
2552                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2553                 }
2554                 if (!neigh)
2555                         neigh = __neigh_create(tbl, addr, dev, false);
2556                 err = PTR_ERR(neigh);
2557                 if (IS_ERR(neigh)) {
2558                         rcu_read_unlock_bh();
2559                         goto out_kfree_skb;
2560                 }
2561                 err = neigh->output(neigh, skb);
2562                 rcu_read_unlock_bh();
2563         }
2564         else if (index == NEIGH_LINK_TABLE) {
2565                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2566                                       addr, NULL, skb->len);
2567                 if (err < 0)
2568                         goto out_kfree_skb;
2569                 err = dev_queue_xmit(skb);
2570         }
2571 out:
2572         return err;
2573 out_kfree_skb:
2574         kfree_skb(skb);
2575         goto out;
2576 }
2577 EXPORT_SYMBOL(neigh_xmit);
2578
2579 #ifdef CONFIG_PROC_FS
2580
2581 static struct neighbour *neigh_get_first(struct seq_file *seq)
2582 {
2583         struct neigh_seq_state *state = seq->private;
2584         struct net *net = seq_file_net(seq);
2585         struct neigh_hash_table *nht = state->nht;
2586         struct neighbour *n = NULL;
2587         int bucket = state->bucket;
2588
2589         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2590         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2591                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2592
2593                 while (n) {
2594                         if (!net_eq(dev_net(n->dev), net))
2595                                 goto next;
2596                         if (state->neigh_sub_iter) {
2597                                 loff_t fakep = 0;
2598                                 void *v;
2599
2600                                 v = state->neigh_sub_iter(state, n, &fakep);
2601                                 if (!v)
2602                                         goto next;
2603                         }
2604                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2605                                 break;
2606                         if (n->nud_state & ~NUD_NOARP)
2607                                 break;
2608 next:
2609                         n = rcu_dereference_bh(n->next);
2610                 }
2611
2612                 if (n)
2613                         break;
2614         }
2615         state->bucket = bucket;
2616
2617         return n;
2618 }
2619
2620 static struct neighbour *neigh_get_next(struct seq_file *seq,
2621                                         struct neighbour *n,
2622                                         loff_t *pos)
2623 {
2624         struct neigh_seq_state *state = seq->private;
2625         struct net *net = seq_file_net(seq);
2626         struct neigh_hash_table *nht = state->nht;
2627
2628         if (state->neigh_sub_iter) {
2629                 void *v = state->neigh_sub_iter(state, n, pos);
2630                 if (v)
2631                         return n;
2632         }
2633         n = rcu_dereference_bh(n->next);
2634
2635         while (1) {
2636                 while (n) {
2637                         if (!net_eq(dev_net(n->dev), net))
2638                                 goto next;
2639                         if (state->neigh_sub_iter) {
2640                                 void *v = state->neigh_sub_iter(state, n, pos);
2641                                 if (v)
2642                                         return n;
2643                                 goto next;
2644                         }
2645                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2646                                 break;
2647
2648                         if (n->nud_state & ~NUD_NOARP)
2649                                 break;
2650 next:
2651                         n = rcu_dereference_bh(n->next);
2652                 }
2653
2654                 if (n)
2655                         break;
2656
2657                 if (++state->bucket >= (1 << nht->hash_shift))
2658                         break;
2659
2660                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2661         }
2662
2663         if (n && pos)
2664                 --(*pos);
2665         return n;
2666 }
2667
2668 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2669 {
2670         struct neighbour *n = neigh_get_first(seq);
2671
2672         if (n) {
2673                 --(*pos);
2674                 while (*pos) {
2675                         n = neigh_get_next(seq, n, pos);
2676                         if (!n)
2677                                 break;
2678                 }
2679         }
2680         return *pos ? NULL : n;
2681 }
2682
2683 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2684 {
2685         struct neigh_seq_state *state = seq->private;
2686         struct net *net = seq_file_net(seq);
2687         struct neigh_table *tbl = state->tbl;
2688         struct pneigh_entry *pn = NULL;
2689         int bucket = state->bucket;
2690
2691         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2692         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2693                 pn = tbl->phash_buckets[bucket];
2694                 while (pn && !net_eq(pneigh_net(pn), net))
2695                         pn = pn->next;
2696                 if (pn)
2697                         break;
2698         }
2699         state->bucket = bucket;
2700
2701         return pn;
2702 }
2703
2704 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2705                                             struct pneigh_entry *pn,
2706                                             loff_t *pos)
2707 {
2708         struct neigh_seq_state *state = seq->private;
2709         struct net *net = seq_file_net(seq);
2710         struct neigh_table *tbl = state->tbl;
2711
2712         do {
2713                 pn = pn->next;
2714         } while (pn && !net_eq(pneigh_net(pn), net));
2715
2716         while (!pn) {
2717                 if (++state->bucket > PNEIGH_HASHMASK)
2718                         break;
2719                 pn = tbl->phash_buckets[state->bucket];
2720                 while (pn && !net_eq(pneigh_net(pn), net))
2721                         pn = pn->next;
2722                 if (pn)
2723                         break;
2724         }
2725
2726         if (pn && pos)
2727                 --(*pos);
2728
2729         return pn;
2730 }
2731
2732 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2733 {
2734         struct pneigh_entry *pn = pneigh_get_first(seq);
2735
2736         if (pn) {
2737                 --(*pos);
2738                 while (*pos) {
2739                         pn = pneigh_get_next(seq, pn, pos);
2740                         if (!pn)
2741                                 break;
2742                 }
2743         }
2744         return *pos ? NULL : pn;
2745 }
2746
2747 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2748 {
2749         struct neigh_seq_state *state = seq->private;
2750         void *rc;
2751         loff_t idxpos = *pos;
2752
2753         rc = neigh_get_idx(seq, &idxpos);
2754         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2755                 rc = pneigh_get_idx(seq, &idxpos);
2756
2757         return rc;
2758 }
2759
2760 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2761         __acquires(tbl->lock)
2762         __acquires(rcu_bh)
2763 {
2764         struct neigh_seq_state *state = seq->private;
2765
2766         state->tbl = tbl;
2767         state->bucket = 0;
2768         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2769
2770         rcu_read_lock_bh();
2771         state->nht = rcu_dereference_bh(tbl->nht);
2772         read_lock(&tbl->lock);
2773
2774         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2775 }
2776 EXPORT_SYMBOL(neigh_seq_start);
2777
2778 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2779 {
2780         struct neigh_seq_state *state;
2781         void *rc;
2782
2783         if (v == SEQ_START_TOKEN) {
2784                 rc = neigh_get_first(seq);
2785                 goto out;
2786         }
2787
2788         state = seq->private;
2789         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2790                 rc = neigh_get_next(seq, v, NULL);
2791                 if (rc)
2792                         goto out;
2793                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2794                         rc = pneigh_get_first(seq);
2795         } else {
2796                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2797                 rc = pneigh_get_next(seq, v, NULL);
2798         }
2799 out:
2800         ++(*pos);
2801         return rc;
2802 }
2803 EXPORT_SYMBOL(neigh_seq_next);
2804
2805 void neigh_seq_stop(struct seq_file *seq, void *v)
2806         __releases(tbl->lock)
2807         __releases(rcu_bh)
2808 {
2809         struct neigh_seq_state *state = seq->private;
2810         struct neigh_table *tbl = state->tbl;
2811
2812         read_unlock(&tbl->lock);
2813         rcu_read_unlock_bh();
2814 }
2815 EXPORT_SYMBOL(neigh_seq_stop);
2816
2817 /* statistics via seq_file */
2818
2819 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2820 {
2821         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2822         int cpu;
2823
2824         if (*pos == 0)
2825                 return SEQ_START_TOKEN;
2826
2827         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2828                 if (!cpu_possible(cpu))
2829                         continue;
2830                 *pos = cpu+1;
2831                 return per_cpu_ptr(tbl->stats, cpu);
2832         }
2833         return NULL;
2834 }
2835
2836 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2837 {
2838         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2839         int cpu;
2840
2841         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2842                 if (!cpu_possible(cpu))
2843                         continue;
2844                 *pos = cpu+1;
2845                 return per_cpu_ptr(tbl->stats, cpu);
2846         }
2847         (*pos)++;
2848         return NULL;
2849 }
2850
2851 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2852 {
2853
2854 }
2855
2856 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2857 {
2858         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2859         struct neigh_statistics *st = v;
2860
2861         if (v == SEQ_START_TOKEN) {
2862                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2863                 return 0;
2864         }
2865
2866         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2867                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2868                    atomic_read(&tbl->entries),
2869
2870                    st->allocs,
2871                    st->destroys,
2872                    st->hash_grows,
2873
2874                    st->lookups,
2875                    st->hits,
2876
2877                    st->res_failed,
2878
2879                    st->rcv_probes_mcast,
2880                    st->rcv_probes_ucast,
2881
2882                    st->periodic_gc_runs,
2883                    st->forced_gc_runs,
2884                    st->unres_discards,
2885                    st->table_fulls
2886                    );
2887
2888         return 0;
2889 }
2890
2891 static const struct seq_operations neigh_stat_seq_ops = {
2892         .start  = neigh_stat_seq_start,
2893         .next   = neigh_stat_seq_next,
2894         .stop   = neigh_stat_seq_stop,
2895         .show   = neigh_stat_seq_show,
2896 };
2897 #endif /* CONFIG_PROC_FS */
2898
2899 static inline size_t neigh_nlmsg_size(void)
2900 {
2901         return NLMSG_ALIGN(sizeof(struct ndmsg))
2902                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2903                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2904                + nla_total_size(sizeof(struct nda_cacheinfo))
2905                + nla_total_size(4); /* NDA_PROBES */
2906 }
2907
2908 static void __neigh_notify(struct neighbour *n, int type, int flags,
2909                            u32 pid)
2910 {
2911         struct net *net = dev_net(n->dev);
2912         struct sk_buff *skb;
2913         int err = -ENOBUFS;
2914
2915         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2916         if (skb == NULL)
2917                 goto errout;
2918
2919         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2920         if (err < 0) {
2921                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2922                 WARN_ON(err == -EMSGSIZE);
2923                 kfree_skb(skb);
2924                 goto errout;
2925         }
2926         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2927         return;
2928 errout:
2929         if (err < 0)
2930                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2931 }
2932
2933 void neigh_app_ns(struct neighbour *n)
2934 {
2935         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2936 }
2937 EXPORT_SYMBOL(neigh_app_ns);
2938
2939 #ifdef CONFIG_SYSCTL
2940 static int zero;
2941 static int int_max = INT_MAX;
2942 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2943
2944 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2945                            void __user *buffer, size_t *lenp, loff_t *ppos)
2946 {
2947         int size, ret;
2948         struct ctl_table tmp = *ctl;
2949
2950         tmp.extra1 = &zero;
2951         tmp.extra2 = &unres_qlen_max;
2952         tmp.data = &size;
2953
2954         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2955         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2956
2957         if (write && !ret)
2958                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2959         return ret;
2960 }
2961
2962 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2963                                                    int family)
2964 {
2965         switch (family) {
2966         case AF_INET:
2967                 return __in_dev_arp_parms_get_rcu(dev);
2968         case AF_INET6:
2969                 return __in6_dev_nd_parms_get_rcu(dev);
2970         }
2971         return NULL;
2972 }
2973
2974 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2975                                   int index)
2976 {
2977         struct net_device *dev;
2978         int family = neigh_parms_family(p);
2979
2980         rcu_read_lock();
2981         for_each_netdev_rcu(net, dev) {
2982                 struct neigh_parms *dst_p =
2983                                 neigh_get_dev_parms_rcu(dev, family);
2984
2985                 if (dst_p && !test_bit(index, dst_p->data_state))
2986                         dst_p->data[index] = p->data[index];
2987         }
2988         rcu_read_unlock();
2989 }
2990
2991 static void neigh_proc_update(struct ctl_table *ctl, int write)
2992 {
2993         struct net_device *dev = ctl->extra1;
2994         struct neigh_parms *p = ctl->extra2;
2995         struct net *net = neigh_parms_net(p);
2996         int index = (int *) ctl->data - p->data;
2997
2998         if (!write)
2999                 return;
3000
3001         set_bit(index, p->data_state);
3002         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3003                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3004         if (!dev) /* NULL dev means this is default value */
3005                 neigh_copy_dflt_parms(net, p, index);
3006 }
3007
3008 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3009                                            void __user *buffer,
3010                                            size_t *lenp, loff_t *ppos)
3011 {
3012         struct ctl_table tmp = *ctl;
3013         int ret;
3014
3015         tmp.extra1 = &zero;
3016         tmp.extra2 = &int_max;
3017
3018         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3019         neigh_proc_update(ctl, write);
3020         return ret;
3021 }
3022
3023 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3024                         void __user *buffer, size_t *lenp, loff_t *ppos)
3025 {
3026         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3027
3028         neigh_proc_update(ctl, write);
3029         return ret;
3030 }
3031 EXPORT_SYMBOL(neigh_proc_dointvec);
3032
3033 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3034                                 void __user *buffer,
3035                                 size_t *lenp, loff_t *ppos)
3036 {
3037         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3038
3039         neigh_proc_update(ctl, write);
3040         return ret;
3041 }
3042 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3043
3044 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3045                                               void __user *buffer,
3046                                               size_t *lenp, loff_t *ppos)
3047 {
3048         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3049
3050         neigh_proc_update(ctl, write);
3051         return ret;
3052 }
3053
3054 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3055                                    void __user *buffer,
3056                                    size_t *lenp, loff_t *ppos)
3057 {
3058         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3059
3060         neigh_proc_update(ctl, write);
3061         return ret;
3062 }
3063 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3064
3065 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3066                                           void __user *buffer,
3067                                           size_t *lenp, loff_t *ppos)
3068 {
3069         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3070
3071         neigh_proc_update(ctl, write);
3072         return ret;
3073 }
3074
3075 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3076                                           void __user *buffer,
3077                                           size_t *lenp, loff_t *ppos)
3078 {
3079         struct neigh_parms *p = ctl->extra2;
3080         int ret;
3081
3082         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3083                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3084         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3085                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3086         else
3087                 ret = -1;
3088
3089         if (write && ret == 0) {
3090                 /* update reachable_time as well, otherwise, the change will
3091                  * only be effective after the next time neigh_periodic_work
3092                  * decides to recompute it
3093                  */
3094                 p->reachable_time =
3095                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3096         }
3097         return ret;
3098 }
3099
3100 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3101         (&((struct neigh_parms *) 0)->data[index])
3102
3103 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3104         [NEIGH_VAR_ ## attr] = { \
3105                 .procname       = name, \
3106                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3107                 .maxlen         = sizeof(int), \
3108                 .mode           = mval, \
3109                 .proc_handler   = proc, \
3110         }
3111
3112 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3113         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3114
3115 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3116         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3117
3118 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3119         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3120
3121 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3122         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3123
3124 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3125         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3126
3127 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3128         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3129
3130 static struct neigh_sysctl_table {
3131         struct ctl_table_header *sysctl_header;
3132         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3133 } neigh_sysctl_template __read_mostly = {
3134         .neigh_vars = {
3135                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3136                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3137                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3138                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3139                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3140                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3141                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3142                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3143                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3144                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3145                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3146                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3147                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3148                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3149                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3150                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3151                 [NEIGH_VAR_GC_INTERVAL] = {
3152                         .procname       = "gc_interval",
3153                         .maxlen         = sizeof(int),
3154                         .mode           = 0644,
3155                         .proc_handler   = proc_dointvec_jiffies,
3156                 },
3157                 [NEIGH_VAR_GC_THRESH1] = {
3158                         .procname       = "gc_thresh1",
3159                         .maxlen         = sizeof(int),
3160                         .mode           = 0644,
3161                         .extra1         = &zero,
3162                         .extra2         = &int_max,
3163                         .proc_handler   = proc_dointvec_minmax,
3164                 },
3165                 [NEIGH_VAR_GC_THRESH2] = {
3166                         .procname       = "gc_thresh2",
3167                         .maxlen         = sizeof(int),
3168                         .mode           = 0644,
3169                         .extra1         = &zero,
3170                         .extra2         = &int_max,
3171                         .proc_handler   = proc_dointvec_minmax,
3172                 },
3173                 [NEIGH_VAR_GC_THRESH3] = {
3174                         .procname       = "gc_thresh3",
3175                         .maxlen         = sizeof(int),
3176                         .mode           = 0644,
3177                         .extra1         = &zero,
3178                         .extra2         = &int_max,
3179                         .proc_handler   = proc_dointvec_minmax,
3180                 },
3181                 {},
3182         },
3183 };
3184
3185 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3186                           proc_handler *handler)
3187 {
3188         int i;
3189         struct neigh_sysctl_table *t;
3190         const char *dev_name_source;
3191         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3192         char *p_name;
3193
3194         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3195         if (!t)
3196                 goto err;
3197
3198         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3199                 t->neigh_vars[i].data += (long) p;
3200                 t->neigh_vars[i].extra1 = dev;
3201                 t->neigh_vars[i].extra2 = p;
3202         }
3203
3204         if (dev) {
3205                 dev_name_source = dev->name;
3206                 /* Terminate the table early */
3207                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3208                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3209         } else {
3210                 struct neigh_table *tbl = p->tbl;
3211                 dev_name_source = "default";
3212                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3213                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3214                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3215                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3216         }
3217
3218         if (handler) {
3219                 /* RetransTime */
3220                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3221                 /* ReachableTime */
3222                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3223                 /* RetransTime (in milliseconds)*/
3224                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3225                 /* ReachableTime (in milliseconds) */
3226                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3227         } else {
3228                 /* Those handlers will update p->reachable_time after
3229                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3230                  * applied after the next neighbour update instead of waiting for
3231                  * neigh_periodic_work to update its value (can be multiple minutes)
3232                  * So any handler that replaces them should do this as well
3233                  */
3234                 /* ReachableTime */
3235                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3236                         neigh_proc_base_reachable_time;
3237                 /* ReachableTime (in milliseconds) */
3238                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3239                         neigh_proc_base_reachable_time;
3240         }
3241
3242         /* Don't export sysctls to unprivileged users */
3243         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3244                 t->neigh_vars[0].procname = NULL;
3245
3246         switch (neigh_parms_family(p)) {
3247         case AF_INET:
3248               p_name = "ipv4";
3249               break;
3250         case AF_INET6:
3251               p_name = "ipv6";
3252               break;
3253         default:
3254               BUG();
3255         }
3256
3257         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3258                 p_name, dev_name_source);
3259         t->sysctl_header =
3260                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3261         if (!t->sysctl_header)
3262                 goto free;
3263
3264         p->sysctl_table = t;
3265         return 0;
3266
3267 free:
3268         kfree(t);
3269 err:
3270         return -ENOBUFS;
3271 }
3272 EXPORT_SYMBOL(neigh_sysctl_register);
3273
3274 void neigh_sysctl_unregister(struct neigh_parms *p)
3275 {
3276         if (p->sysctl_table) {
3277                 struct neigh_sysctl_table *t = p->sysctl_table;
3278                 p->sysctl_table = NULL;
3279                 unregister_net_sysctl_table(t->sysctl_header);
3280                 kfree(t);
3281         }
3282 }
3283 EXPORT_SYMBOL(neigh_sysctl_unregister);
3284
3285 #endif  /* CONFIG_SYSCTL */
3286
3287 static int __init neigh_init(void)
3288 {
3289         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3290         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3291         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3292
3293         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3294                       0);
3295         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3296
3297         return 0;
3298 }
3299
3300 subsys_initcall(neigh_init);