GNU Linux-libre 4.14.251-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58                            u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61                                     struct net_device *dev);
62
63 #ifdef CONFIG_PROC_FS
64 static const struct file_operations neigh_stat_seq_fops;
65 #endif
66
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81
82    Reference count prevents destruction.
83
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static bool neigh_del(struct neighbour *n, __u8 state,
125                       struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127         bool retval = false;
128
129         write_lock(&n->lock);
130         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
131                 struct neighbour *neigh;
132
133                 neigh = rcu_dereference_protected(n->next,
134                                                   lockdep_is_held(&tbl->lock));
135                 rcu_assign_pointer(*np, neigh);
136                 n->dead = 1;
137                 retval = true;
138         }
139         write_unlock(&n->lock);
140         if (retval)
141                 neigh_cleanup_and_release(n);
142         return retval;
143 }
144
145 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
146 {
147         struct neigh_hash_table *nht;
148         void *pkey = ndel->primary_key;
149         u32 hash_val;
150         struct neighbour *n;
151         struct neighbour __rcu **np;
152
153         nht = rcu_dereference_protected(tbl->nht,
154                                         lockdep_is_held(&tbl->lock));
155         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
156         hash_val = hash_val >> (32 - nht->hash_shift);
157
158         np = &nht->hash_buckets[hash_val];
159         while ((n = rcu_dereference_protected(*np,
160                                               lockdep_is_held(&tbl->lock)))) {
161                 if (n == ndel)
162                         return neigh_del(n, 0, np, tbl);
163                 np = &n->next;
164         }
165         return false;
166 }
167
168 static int neigh_forced_gc(struct neigh_table *tbl)
169 {
170         int shrunk = 0;
171         int i;
172         struct neigh_hash_table *nht;
173
174         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
175
176         write_lock_bh(&tbl->lock);
177         nht = rcu_dereference_protected(tbl->nht,
178                                         lockdep_is_held(&tbl->lock));
179         for (i = 0; i < (1 << nht->hash_shift); i++) {
180                 struct neighbour *n;
181                 struct neighbour __rcu **np;
182
183                 np = &nht->hash_buckets[i];
184                 while ((n = rcu_dereference_protected(*np,
185                                         lockdep_is_held(&tbl->lock))) != NULL) {
186                         /* Neighbour record may be discarded if:
187                          * - nobody refers to it.
188                          * - it is not permanent
189                          */
190                         if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
191                                 shrunk = 1;
192                                 continue;
193                         }
194                         np = &n->next;
195                 }
196         }
197
198         tbl->last_flush = jiffies;
199
200         write_unlock_bh(&tbl->lock);
201
202         return shrunk;
203 }
204
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207         neigh_hold(n);
208         if (unlikely(mod_timer(&n->timer, when))) {
209                 printk("NEIGH: BUG, double timer add, state is %x\n",
210                        n->nud_state);
211                 dump_stack();
212         }
213 }
214
215 static int neigh_del_timer(struct neighbour *n)
216 {
217         if ((n->nud_state & NUD_IN_TIMER) &&
218             del_timer(&n->timer)) {
219                 neigh_release(n);
220                 return 1;
221         }
222         return 0;
223 }
224
225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227         struct sk_buff *skb;
228
229         while ((skb = skb_dequeue(list)) != NULL) {
230                 dev_put(skb->dev);
231                 kfree_skb(skb);
232         }
233 }
234
235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
236 {
237         int i;
238         struct neigh_hash_table *nht;
239
240         nht = rcu_dereference_protected(tbl->nht,
241                                         lockdep_is_held(&tbl->lock));
242
243         for (i = 0; i < (1 << nht->hash_shift); i++) {
244                 struct neighbour *n;
245                 struct neighbour __rcu **np = &nht->hash_buckets[i];
246
247                 while ((n = rcu_dereference_protected(*np,
248                                         lockdep_is_held(&tbl->lock))) != NULL) {
249                         if (dev && n->dev != dev) {
250                                 np = &n->next;
251                                 continue;
252                         }
253                         rcu_assign_pointer(*np,
254                                    rcu_dereference_protected(n->next,
255                                                 lockdep_is_held(&tbl->lock)));
256                         write_lock(&n->lock);
257                         neigh_del_timer(n);
258                         n->dead = 1;
259
260                         if (refcount_read(&n->refcnt) != 1) {
261                                 /* The most unpleasant situation.
262                                    We must destroy neighbour entry,
263                                    but someone still uses it.
264
265                                    The destroy will be delayed until
266                                    the last user releases us, but
267                                    we must kill timers etc. and move
268                                    it to safe state.
269                                  */
270                                 __skb_queue_purge(&n->arp_queue);
271                                 n->arp_queue_len_bytes = 0;
272                                 n->output = neigh_blackhole;
273                                 if (n->nud_state & NUD_VALID)
274                                         n->nud_state = NUD_NOARP;
275                                 else
276                                         n->nud_state = NUD_NONE;
277                                 neigh_dbg(2, "neigh %p is stray\n", n);
278                         }
279                         write_unlock(&n->lock);
280                         neigh_cleanup_and_release(n);
281                 }
282         }
283 }
284
285 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
286 {
287         write_lock_bh(&tbl->lock);
288         neigh_flush_dev(tbl, dev);
289         write_unlock_bh(&tbl->lock);
290 }
291 EXPORT_SYMBOL(neigh_changeaddr);
292
293 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
294 {
295         write_lock_bh(&tbl->lock);
296         neigh_flush_dev(tbl, dev);
297         pneigh_ifdown_and_unlock(tbl, dev);
298
299         del_timer_sync(&tbl->proxy_timer);
300         pneigh_queue_purge(&tbl->proxy_queue);
301         return 0;
302 }
303 EXPORT_SYMBOL(neigh_ifdown);
304
305 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
306 {
307         struct neighbour *n = NULL;
308         unsigned long now = jiffies;
309         int entries;
310
311         entries = atomic_inc_return(&tbl->entries) - 1;
312         if (entries >= tbl->gc_thresh3 ||
313             (entries >= tbl->gc_thresh2 &&
314              time_after(now, tbl->last_flush + 5 * HZ))) {
315                 if (!neigh_forced_gc(tbl) &&
316                     entries >= tbl->gc_thresh3) {
317                         net_info_ratelimited("%s: neighbor table overflow!\n",
318                                              tbl->id);
319                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
320                         goto out_entries;
321                 }
322         }
323
324         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
325         if (!n)
326                 goto out_entries;
327
328         __skb_queue_head_init(&n->arp_queue);
329         rwlock_init(&n->lock);
330         seqlock_init(&n->ha_lock);
331         n->updated        = n->used = now;
332         n->nud_state      = NUD_NONE;
333         n->output         = neigh_blackhole;
334         seqlock_init(&n->hh.hh_lock);
335         n->parms          = neigh_parms_clone(&tbl->parms);
336         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
337
338         NEIGH_CACHE_STAT_INC(tbl, allocs);
339         n->tbl            = tbl;
340         refcount_set(&n->refcnt, 1);
341         n->dead           = 1;
342 out:
343         return n;
344
345 out_entries:
346         atomic_dec(&tbl->entries);
347         goto out;
348 }
349
350 static void neigh_get_hash_rnd(u32 *x)
351 {
352         *x = get_random_u32() | 1;
353 }
354
355 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
356 {
357         size_t size = (1 << shift) * sizeof(struct neighbour *);
358         struct neigh_hash_table *ret;
359         struct neighbour __rcu **buckets;
360         int i;
361
362         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
363         if (!ret)
364                 return NULL;
365         if (size <= PAGE_SIZE) {
366                 buckets = kzalloc(size, GFP_ATOMIC);
367         } else {
368                 buckets = (struct neighbour __rcu **)
369                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
370                                            get_order(size));
371                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
372         }
373         if (!buckets) {
374                 kfree(ret);
375                 return NULL;
376         }
377         ret->hash_buckets = buckets;
378         ret->hash_shift = shift;
379         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
380                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
381         return ret;
382 }
383
384 static void neigh_hash_free_rcu(struct rcu_head *head)
385 {
386         struct neigh_hash_table *nht = container_of(head,
387                                                     struct neigh_hash_table,
388                                                     rcu);
389         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
390         struct neighbour __rcu **buckets = nht->hash_buckets;
391
392         if (size <= PAGE_SIZE) {
393                 kfree(buckets);
394         } else {
395                 kmemleak_free(buckets);
396                 free_pages((unsigned long)buckets, get_order(size));
397         }
398         kfree(nht);
399 }
400
401 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
402                                                 unsigned long new_shift)
403 {
404         unsigned int i, hash;
405         struct neigh_hash_table *new_nht, *old_nht;
406
407         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
408
409         old_nht = rcu_dereference_protected(tbl->nht,
410                                             lockdep_is_held(&tbl->lock));
411         new_nht = neigh_hash_alloc(new_shift);
412         if (!new_nht)
413                 return old_nht;
414
415         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
416                 struct neighbour *n, *next;
417
418                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
419                                                    lockdep_is_held(&tbl->lock));
420                      n != NULL;
421                      n = next) {
422                         hash = tbl->hash(n->primary_key, n->dev,
423                                          new_nht->hash_rnd);
424
425                         hash >>= (32 - new_nht->hash_shift);
426                         next = rcu_dereference_protected(n->next,
427                                                 lockdep_is_held(&tbl->lock));
428
429                         rcu_assign_pointer(n->next,
430                                            rcu_dereference_protected(
431                                                 new_nht->hash_buckets[hash],
432                                                 lockdep_is_held(&tbl->lock)));
433                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
434                 }
435         }
436
437         rcu_assign_pointer(tbl->nht, new_nht);
438         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
439         return new_nht;
440 }
441
442 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
443                                struct net_device *dev)
444 {
445         struct neighbour *n;
446
447         NEIGH_CACHE_STAT_INC(tbl, lookups);
448
449         rcu_read_lock_bh();
450         n = __neigh_lookup_noref(tbl, pkey, dev);
451         if (n) {
452                 if (!refcount_inc_not_zero(&n->refcnt))
453                         n = NULL;
454                 NEIGH_CACHE_STAT_INC(tbl, hits);
455         }
456
457         rcu_read_unlock_bh();
458         return n;
459 }
460 EXPORT_SYMBOL(neigh_lookup);
461
462 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
463                                      const void *pkey)
464 {
465         struct neighbour *n;
466         int key_len = tbl->key_len;
467         u32 hash_val;
468         struct neigh_hash_table *nht;
469
470         NEIGH_CACHE_STAT_INC(tbl, lookups);
471
472         rcu_read_lock_bh();
473         nht = rcu_dereference_bh(tbl->nht);
474         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
475
476         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
477              n != NULL;
478              n = rcu_dereference_bh(n->next)) {
479                 if (!memcmp(n->primary_key, pkey, key_len) &&
480                     net_eq(dev_net(n->dev), net)) {
481                         if (!refcount_inc_not_zero(&n->refcnt))
482                                 n = NULL;
483                         NEIGH_CACHE_STAT_INC(tbl, hits);
484                         break;
485                 }
486         }
487
488         rcu_read_unlock_bh();
489         return n;
490 }
491 EXPORT_SYMBOL(neigh_lookup_nodev);
492
493 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
494                                  struct net_device *dev, bool want_ref)
495 {
496         u32 hash_val;
497         int key_len = tbl->key_len;
498         int error;
499         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
500         struct neigh_hash_table *nht;
501
502         if (!n) {
503                 rc = ERR_PTR(-ENOBUFS);
504                 goto out;
505         }
506
507         memcpy(n->primary_key, pkey, key_len);
508         n->dev = dev;
509         dev_hold(dev);
510
511         /* Protocol specific setup. */
512         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
513                 rc = ERR_PTR(error);
514                 goto out_neigh_release;
515         }
516
517         if (dev->netdev_ops->ndo_neigh_construct) {
518                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
519                 if (error < 0) {
520                         rc = ERR_PTR(error);
521                         goto out_neigh_release;
522                 }
523         }
524
525         /* Device specific setup. */
526         if (n->parms->neigh_setup &&
527             (error = n->parms->neigh_setup(n)) < 0) {
528                 rc = ERR_PTR(error);
529                 goto out_neigh_release;
530         }
531
532         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
533
534         write_lock_bh(&tbl->lock);
535         nht = rcu_dereference_protected(tbl->nht,
536                                         lockdep_is_held(&tbl->lock));
537
538         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
539                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
540
541         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
542
543         if (n->parms->dead) {
544                 rc = ERR_PTR(-EINVAL);
545                 goto out_tbl_unlock;
546         }
547
548         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
549                                             lockdep_is_held(&tbl->lock));
550              n1 != NULL;
551              n1 = rcu_dereference_protected(n1->next,
552                         lockdep_is_held(&tbl->lock))) {
553                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
554                         if (want_ref)
555                                 neigh_hold(n1);
556                         rc = n1;
557                         goto out_tbl_unlock;
558                 }
559         }
560
561         n->dead = 0;
562         if (want_ref)
563                 neigh_hold(n);
564         rcu_assign_pointer(n->next,
565                            rcu_dereference_protected(nht->hash_buckets[hash_val],
566                                                      lockdep_is_held(&tbl->lock)));
567         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
568         write_unlock_bh(&tbl->lock);
569         neigh_dbg(2, "neigh %p is created\n", n);
570         rc = n;
571 out:
572         return rc;
573 out_tbl_unlock:
574         write_unlock_bh(&tbl->lock);
575 out_neigh_release:
576         neigh_release(n);
577         goto out;
578 }
579 EXPORT_SYMBOL(__neigh_create);
580
581 static u32 pneigh_hash(const void *pkey, int key_len)
582 {
583         u32 hash_val = *(u32 *)(pkey + key_len - 4);
584         hash_val ^= (hash_val >> 16);
585         hash_val ^= hash_val >> 8;
586         hash_val ^= hash_val >> 4;
587         hash_val &= PNEIGH_HASHMASK;
588         return hash_val;
589 }
590
591 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
592                                               struct net *net,
593                                               const void *pkey,
594                                               int key_len,
595                                               struct net_device *dev)
596 {
597         while (n) {
598                 if (!memcmp(n->key, pkey, key_len) &&
599                     net_eq(pneigh_net(n), net) &&
600                     (n->dev == dev || !n->dev))
601                         return n;
602                 n = n->next;
603         }
604         return NULL;
605 }
606
607 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
608                 struct net *net, const void *pkey, struct net_device *dev)
609 {
610         int key_len = tbl->key_len;
611         u32 hash_val = pneigh_hash(pkey, key_len);
612
613         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
614                                  net, pkey, key_len, dev);
615 }
616 EXPORT_SYMBOL_GPL(__pneigh_lookup);
617
618 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
619                                     struct net *net, const void *pkey,
620                                     struct net_device *dev, int creat)
621 {
622         struct pneigh_entry *n;
623         int key_len = tbl->key_len;
624         u32 hash_val = pneigh_hash(pkey, key_len);
625
626         read_lock_bh(&tbl->lock);
627         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
628                               net, pkey, key_len, dev);
629         read_unlock_bh(&tbl->lock);
630
631         if (n || !creat)
632                 goto out;
633
634         ASSERT_RTNL();
635
636         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
637         if (!n)
638                 goto out;
639
640         write_pnet(&n->net, net);
641         memcpy(n->key, pkey, key_len);
642         n->dev = dev;
643         if (dev)
644                 dev_hold(dev);
645
646         if (tbl->pconstructor && tbl->pconstructor(n)) {
647                 if (dev)
648                         dev_put(dev);
649                 kfree(n);
650                 n = NULL;
651                 goto out;
652         }
653
654         write_lock_bh(&tbl->lock);
655         n->next = tbl->phash_buckets[hash_val];
656         tbl->phash_buckets[hash_val] = n;
657         write_unlock_bh(&tbl->lock);
658 out:
659         return n;
660 }
661 EXPORT_SYMBOL(pneigh_lookup);
662
663
664 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
665                   struct net_device *dev)
666 {
667         struct pneigh_entry *n, **np;
668         int key_len = tbl->key_len;
669         u32 hash_val = pneigh_hash(pkey, key_len);
670
671         write_lock_bh(&tbl->lock);
672         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
673              np = &n->next) {
674                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
675                     net_eq(pneigh_net(n), net)) {
676                         *np = n->next;
677                         write_unlock_bh(&tbl->lock);
678                         if (tbl->pdestructor)
679                                 tbl->pdestructor(n);
680                         if (n->dev)
681                                 dev_put(n->dev);
682                         kfree(n);
683                         return 0;
684                 }
685         }
686         write_unlock_bh(&tbl->lock);
687         return -ENOENT;
688 }
689
690 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
691                                     struct net_device *dev)
692 {
693         struct pneigh_entry *n, **np, *freelist = NULL;
694         u32 h;
695
696         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
697                 np = &tbl->phash_buckets[h];
698                 while ((n = *np) != NULL) {
699                         if (!dev || n->dev == dev) {
700                                 *np = n->next;
701                                 n->next = freelist;
702                                 freelist = n;
703                                 continue;
704                         }
705                         np = &n->next;
706                 }
707         }
708         write_unlock_bh(&tbl->lock);
709         while ((n = freelist)) {
710                 freelist = n->next;
711                 n->next = NULL;
712                 if (tbl->pdestructor)
713                         tbl->pdestructor(n);
714                 if (n->dev)
715                         dev_put(n->dev);
716                 kfree(n);
717         }
718         return -ENOENT;
719 }
720
721 static void neigh_parms_destroy(struct neigh_parms *parms);
722
723 static inline void neigh_parms_put(struct neigh_parms *parms)
724 {
725         if (refcount_dec_and_test(&parms->refcnt))
726                 neigh_parms_destroy(parms);
727 }
728
729 /*
730  *      neighbour must already be out of the table;
731  *
732  */
733 void neigh_destroy(struct neighbour *neigh)
734 {
735         struct net_device *dev = neigh->dev;
736
737         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
738
739         if (!neigh->dead) {
740                 pr_warn("Destroying alive neighbour %p\n", neigh);
741                 dump_stack();
742                 return;
743         }
744
745         if (neigh_del_timer(neigh))
746                 pr_warn("Impossible event\n");
747
748         write_lock_bh(&neigh->lock);
749         __skb_queue_purge(&neigh->arp_queue);
750         write_unlock_bh(&neigh->lock);
751         neigh->arp_queue_len_bytes = 0;
752
753         if (dev->netdev_ops->ndo_neigh_destroy)
754                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
755
756         dev_put(dev);
757         neigh_parms_put(neigh->parms);
758
759         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
760
761         atomic_dec(&neigh->tbl->entries);
762         kfree_rcu(neigh, rcu);
763 }
764 EXPORT_SYMBOL(neigh_destroy);
765
766 /* Neighbour state is suspicious;
767    disable fast path.
768
769    Called with write_locked neigh.
770  */
771 static void neigh_suspect(struct neighbour *neigh)
772 {
773         neigh_dbg(2, "neigh %p is suspected\n", neigh);
774
775         neigh->output = neigh->ops->output;
776 }
777
778 /* Neighbour state is OK;
779    enable fast path.
780
781    Called with write_locked neigh.
782  */
783 static void neigh_connect(struct neighbour *neigh)
784 {
785         neigh_dbg(2, "neigh %p is connected\n", neigh);
786
787         neigh->output = neigh->ops->connected_output;
788 }
789
790 static void neigh_periodic_work(struct work_struct *work)
791 {
792         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
793         struct neighbour *n;
794         struct neighbour __rcu **np;
795         unsigned int i;
796         struct neigh_hash_table *nht;
797
798         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
799
800         write_lock_bh(&tbl->lock);
801         nht = rcu_dereference_protected(tbl->nht,
802                                         lockdep_is_held(&tbl->lock));
803
804         /*
805          *      periodically recompute ReachableTime from random function
806          */
807
808         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
809                 struct neigh_parms *p;
810                 tbl->last_rand = jiffies;
811                 list_for_each_entry(p, &tbl->parms_list, list)
812                         p->reachable_time =
813                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
814         }
815
816         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
817                 goto out;
818
819         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
820                 np = &nht->hash_buckets[i];
821
822                 while ((n = rcu_dereference_protected(*np,
823                                 lockdep_is_held(&tbl->lock))) != NULL) {
824                         unsigned int state;
825
826                         write_lock(&n->lock);
827
828                         state = n->nud_state;
829                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
830                                 write_unlock(&n->lock);
831                                 goto next_elt;
832                         }
833
834                         if (time_before(n->used, n->confirmed))
835                                 n->used = n->confirmed;
836
837                         if (refcount_read(&n->refcnt) == 1 &&
838                             (state == NUD_FAILED ||
839                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
840                                 *np = n->next;
841                                 n->dead = 1;
842                                 write_unlock(&n->lock);
843                                 neigh_cleanup_and_release(n);
844                                 continue;
845                         }
846                         write_unlock(&n->lock);
847
848 next_elt:
849                         np = &n->next;
850                 }
851                 /*
852                  * It's fine to release lock here, even if hash table
853                  * grows while we are preempted.
854                  */
855                 write_unlock_bh(&tbl->lock);
856                 cond_resched();
857                 write_lock_bh(&tbl->lock);
858                 nht = rcu_dereference_protected(tbl->nht,
859                                                 lockdep_is_held(&tbl->lock));
860         }
861 out:
862         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
863          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
864          * BASE_REACHABLE_TIME.
865          */
866         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
867                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
868         write_unlock_bh(&tbl->lock);
869 }
870
871 static __inline__ int neigh_max_probes(struct neighbour *n)
872 {
873         struct neigh_parms *p = n->parms;
874         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
875                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
876                 NEIGH_VAR(p, MCAST_PROBES));
877 }
878
879 static void neigh_invalidate(struct neighbour *neigh)
880         __releases(neigh->lock)
881         __acquires(neigh->lock)
882 {
883         struct sk_buff *skb;
884
885         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
886         neigh_dbg(2, "neigh %p is failed\n", neigh);
887         neigh->updated = jiffies;
888
889         /* It is very thin place. report_unreachable is very complicated
890            routine. Particularly, it can hit the same neighbour entry!
891
892            So that, we try to be accurate and avoid dead loop. --ANK
893          */
894         while (neigh->nud_state == NUD_FAILED &&
895                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
896                 write_unlock(&neigh->lock);
897                 neigh->ops->error_report(neigh, skb);
898                 write_lock(&neigh->lock);
899         }
900         __skb_queue_purge(&neigh->arp_queue);
901         neigh->arp_queue_len_bytes = 0;
902 }
903
904 static void neigh_probe(struct neighbour *neigh)
905         __releases(neigh->lock)
906 {
907         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
908         /* keep skb alive even if arp_queue overflows */
909         if (skb)
910                 skb = skb_clone(skb, GFP_ATOMIC);
911         write_unlock(&neigh->lock);
912         if (neigh->ops->solicit)
913                 neigh->ops->solicit(neigh, skb);
914         atomic_inc(&neigh->probes);
915         kfree_skb(skb);
916 }
917
918 /* Called when a timer expires for a neighbour entry. */
919
920 static void neigh_timer_handler(unsigned long arg)
921 {
922         unsigned long now, next;
923         struct neighbour *neigh = (struct neighbour *)arg;
924         unsigned int state;
925         int notify = 0;
926
927         write_lock(&neigh->lock);
928
929         state = neigh->nud_state;
930         now = jiffies;
931         next = now + HZ;
932
933         if (!(state & NUD_IN_TIMER))
934                 goto out;
935
936         if (state & NUD_REACHABLE) {
937                 if (time_before_eq(now,
938                                    neigh->confirmed + neigh->parms->reachable_time)) {
939                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
940                         next = neigh->confirmed + neigh->parms->reachable_time;
941                 } else if (time_before_eq(now,
942                                           neigh->used +
943                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
944                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
945                         neigh->nud_state = NUD_DELAY;
946                         neigh->updated = jiffies;
947                         neigh_suspect(neigh);
948                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
949                 } else {
950                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
951                         neigh->nud_state = NUD_STALE;
952                         neigh->updated = jiffies;
953                         neigh_suspect(neigh);
954                         notify = 1;
955                 }
956         } else if (state & NUD_DELAY) {
957                 if (time_before_eq(now,
958                                    neigh->confirmed +
959                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
960                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
961                         neigh->nud_state = NUD_REACHABLE;
962                         neigh->updated = jiffies;
963                         neigh_connect(neigh);
964                         notify = 1;
965                         next = neigh->confirmed + neigh->parms->reachable_time;
966                 } else {
967                         neigh_dbg(2, "neigh %p is probed\n", neigh);
968                         neigh->nud_state = NUD_PROBE;
969                         neigh->updated = jiffies;
970                         atomic_set(&neigh->probes, 0);
971                         notify = 1;
972                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
973                 }
974         } else {
975                 /* NUD_PROBE|NUD_INCOMPLETE */
976                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
977         }
978
979         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
980             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
981                 neigh->nud_state = NUD_FAILED;
982                 notify = 1;
983                 neigh_invalidate(neigh);
984                 goto out;
985         }
986
987         if (neigh->nud_state & NUD_IN_TIMER) {
988                 if (time_before(next, jiffies + HZ/2))
989                         next = jiffies + HZ/2;
990                 if (!mod_timer(&neigh->timer, next))
991                         neigh_hold(neigh);
992         }
993         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
994                 neigh_probe(neigh);
995         } else {
996 out:
997                 write_unlock(&neigh->lock);
998         }
999
1000         if (notify)
1001                 neigh_update_notify(neigh, 0);
1002
1003         neigh_release(neigh);
1004 }
1005
1006 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1007 {
1008         int rc;
1009         bool immediate_probe = false;
1010
1011         write_lock_bh(&neigh->lock);
1012
1013         rc = 0;
1014         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1015                 goto out_unlock_bh;
1016         if (neigh->dead)
1017                 goto out_dead;
1018
1019         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1020                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1021                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1022                         unsigned long next, now = jiffies;
1023
1024                         atomic_set(&neigh->probes,
1025                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1026                         neigh_del_timer(neigh);
1027                         neigh->nud_state     = NUD_INCOMPLETE;
1028                         neigh->updated = now;
1029                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1030                                          HZ/2);
1031                         neigh_add_timer(neigh, next);
1032                         immediate_probe = true;
1033                 } else {
1034                         neigh->nud_state = NUD_FAILED;
1035                         neigh->updated = jiffies;
1036                         write_unlock_bh(&neigh->lock);
1037
1038                         kfree_skb(skb);
1039                         return 1;
1040                 }
1041         } else if (neigh->nud_state & NUD_STALE) {
1042                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1043                 neigh_del_timer(neigh);
1044                 neigh->nud_state = NUD_DELAY;
1045                 neigh->updated = jiffies;
1046                 neigh_add_timer(neigh, jiffies +
1047                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1048         }
1049
1050         if (neigh->nud_state == NUD_INCOMPLETE) {
1051                 if (skb) {
1052                         while (neigh->arp_queue_len_bytes + skb->truesize >
1053                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1054                                 struct sk_buff *buff;
1055
1056                                 buff = __skb_dequeue(&neigh->arp_queue);
1057                                 if (!buff)
1058                                         break;
1059                                 neigh->arp_queue_len_bytes -= buff->truesize;
1060                                 kfree_skb(buff);
1061                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1062                         }
1063                         skb_dst_force(skb);
1064                         __skb_queue_tail(&neigh->arp_queue, skb);
1065                         neigh->arp_queue_len_bytes += skb->truesize;
1066                 }
1067                 rc = 1;
1068         }
1069 out_unlock_bh:
1070         if (immediate_probe)
1071                 neigh_probe(neigh);
1072         else
1073                 write_unlock(&neigh->lock);
1074         local_bh_enable();
1075         return rc;
1076
1077 out_dead:
1078         if (neigh->nud_state & NUD_STALE)
1079                 goto out_unlock_bh;
1080         write_unlock_bh(&neigh->lock);
1081         kfree_skb(skb);
1082         return 1;
1083 }
1084 EXPORT_SYMBOL(__neigh_event_send);
1085
1086 static void neigh_update_hhs(struct neighbour *neigh)
1087 {
1088         struct hh_cache *hh;
1089         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1090                 = NULL;
1091
1092         if (neigh->dev->header_ops)
1093                 update = neigh->dev->header_ops->cache_update;
1094
1095         if (update) {
1096                 hh = &neigh->hh;
1097                 if (READ_ONCE(hh->hh_len)) {
1098                         write_seqlock_bh(&hh->hh_lock);
1099                         update(hh, neigh->dev, neigh->ha);
1100                         write_sequnlock_bh(&hh->hh_lock);
1101                 }
1102         }
1103 }
1104
1105
1106
1107 /* Generic update routine.
1108    -- lladdr is new lladdr or NULL, if it is not supplied.
1109    -- new    is new state.
1110    -- flags
1111         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1112                                 if it is different.
1113         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1114                                 lladdr instead of overriding it
1115                                 if it is different.
1116         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1117
1118         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1119                                 NTF_ROUTER flag.
1120         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1121                                 a router.
1122
1123    Caller MUST hold reference count on the entry.
1124  */
1125
1126 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1127                  u32 flags, u32 nlmsg_pid)
1128 {
1129         u8 old;
1130         int err;
1131         int notify = 0;
1132         struct net_device *dev;
1133         int update_isrouter = 0;
1134
1135         write_lock_bh(&neigh->lock);
1136
1137         dev    = neigh->dev;
1138         old    = neigh->nud_state;
1139         err    = -EPERM;
1140
1141         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1142             (old & (NUD_NOARP | NUD_PERMANENT)))
1143                 goto out;
1144         if (neigh->dead)
1145                 goto out;
1146
1147         if (!(new & NUD_VALID)) {
1148                 neigh_del_timer(neigh);
1149                 if (old & NUD_CONNECTED)
1150                         neigh_suspect(neigh);
1151                 neigh->nud_state = new;
1152                 err = 0;
1153                 notify = old & NUD_VALID;
1154                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1155                     (new & NUD_FAILED)) {
1156                         neigh_invalidate(neigh);
1157                         notify = 1;
1158                 }
1159                 goto out;
1160         }
1161
1162         /* Compare new lladdr with cached one */
1163         if (!dev->addr_len) {
1164                 /* First case: device needs no address. */
1165                 lladdr = neigh->ha;
1166         } else if (lladdr) {
1167                 /* The second case: if something is already cached
1168                    and a new address is proposed:
1169                    - compare new & old
1170                    - if they are different, check override flag
1171                  */
1172                 if ((old & NUD_VALID) &&
1173                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1174                         lladdr = neigh->ha;
1175         } else {
1176                 /* No address is supplied; if we know something,
1177                    use it, otherwise discard the request.
1178                  */
1179                 err = -EINVAL;
1180                 if (!(old & NUD_VALID))
1181                         goto out;
1182                 lladdr = neigh->ha;
1183         }
1184
1185         /* Update confirmed timestamp for neighbour entry after we
1186          * received ARP packet even if it doesn't change IP to MAC binding.
1187          */
1188         if (new & NUD_CONNECTED)
1189                 neigh->confirmed = jiffies;
1190
1191         /* If entry was valid and address is not changed,
1192            do not change entry state, if new one is STALE.
1193          */
1194         err = 0;
1195         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1196         if (old & NUD_VALID) {
1197                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1198                         update_isrouter = 0;
1199                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1200                             (old & NUD_CONNECTED)) {
1201                                 lladdr = neigh->ha;
1202                                 new = NUD_STALE;
1203                         } else
1204                                 goto out;
1205                 } else {
1206                         if (lladdr == neigh->ha && new == NUD_STALE &&
1207                             !(flags & NEIGH_UPDATE_F_ADMIN))
1208                                 new = old;
1209                 }
1210         }
1211
1212         /* Update timestamp only once we know we will make a change to the
1213          * neighbour entry. Otherwise we risk to move the locktime window with
1214          * noop updates and ignore relevant ARP updates.
1215          */
1216         if (new != old || lladdr != neigh->ha)
1217                 neigh->updated = jiffies;
1218
1219         if (new != old) {
1220                 neigh_del_timer(neigh);
1221                 if (new & NUD_PROBE)
1222                         atomic_set(&neigh->probes, 0);
1223                 if (new & NUD_IN_TIMER)
1224                         neigh_add_timer(neigh, (jiffies +
1225                                                 ((new & NUD_REACHABLE) ?
1226                                                  neigh->parms->reachable_time :
1227                                                  0)));
1228                 neigh->nud_state = new;
1229                 notify = 1;
1230         }
1231
1232         if (lladdr != neigh->ha) {
1233                 write_seqlock(&neigh->ha_lock);
1234                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1235                 write_sequnlock(&neigh->ha_lock);
1236                 neigh_update_hhs(neigh);
1237                 if (!(new & NUD_CONNECTED))
1238                         neigh->confirmed = jiffies -
1239                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1240                 notify = 1;
1241         }
1242         if (new == old)
1243                 goto out;
1244         if (new & NUD_CONNECTED)
1245                 neigh_connect(neigh);
1246         else
1247                 neigh_suspect(neigh);
1248         if (!(old & NUD_VALID)) {
1249                 struct sk_buff *skb;
1250
1251                 /* Again: avoid dead loop if something went wrong */
1252
1253                 while (neigh->nud_state & NUD_VALID &&
1254                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1255                         struct dst_entry *dst = skb_dst(skb);
1256                         struct neighbour *n2, *n1 = neigh;
1257                         write_unlock_bh(&neigh->lock);
1258
1259                         rcu_read_lock();
1260
1261                         /* Why not just use 'neigh' as-is?  The problem is that
1262                          * things such as shaper, eql, and sch_teql can end up
1263                          * using alternative, different, neigh objects to output
1264                          * the packet in the output path.  So what we need to do
1265                          * here is re-lookup the top-level neigh in the path so
1266                          * we can reinject the packet there.
1267                          */
1268                         n2 = NULL;
1269                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1270                                 n2 = dst_neigh_lookup_skb(dst, skb);
1271                                 if (n2)
1272                                         n1 = n2;
1273                         }
1274                         n1->output(n1, skb);
1275                         if (n2)
1276                                 neigh_release(n2);
1277                         rcu_read_unlock();
1278
1279                         write_lock_bh(&neigh->lock);
1280                 }
1281                 __skb_queue_purge(&neigh->arp_queue);
1282                 neigh->arp_queue_len_bytes = 0;
1283         }
1284 out:
1285         if (update_isrouter) {
1286                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1287                         (neigh->flags | NTF_ROUTER) :
1288                         (neigh->flags & ~NTF_ROUTER);
1289         }
1290         write_unlock_bh(&neigh->lock);
1291
1292         if (notify)
1293                 neigh_update_notify(neigh, nlmsg_pid);
1294
1295         return err;
1296 }
1297 EXPORT_SYMBOL(neigh_update);
1298
1299 /* Update the neigh to listen temporarily for probe responses, even if it is
1300  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1301  */
1302 void __neigh_set_probe_once(struct neighbour *neigh)
1303 {
1304         if (neigh->dead)
1305                 return;
1306         neigh->updated = jiffies;
1307         if (!(neigh->nud_state & NUD_FAILED))
1308                 return;
1309         neigh->nud_state = NUD_INCOMPLETE;
1310         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1311         neigh_add_timer(neigh,
1312                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1313 }
1314 EXPORT_SYMBOL(__neigh_set_probe_once);
1315
1316 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1317                                  u8 *lladdr, void *saddr,
1318                                  struct net_device *dev)
1319 {
1320         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1321                                                  lladdr || !dev->addr_len);
1322         if (neigh)
1323                 neigh_update(neigh, lladdr, NUD_STALE,
1324                              NEIGH_UPDATE_F_OVERRIDE, 0);
1325         return neigh;
1326 }
1327 EXPORT_SYMBOL(neigh_event_ns);
1328
1329 /* called with read_lock_bh(&n->lock); */
1330 static void neigh_hh_init(struct neighbour *n)
1331 {
1332         struct net_device *dev = n->dev;
1333         __be16 prot = n->tbl->protocol;
1334         struct hh_cache *hh = &n->hh;
1335
1336         write_lock_bh(&n->lock);
1337
1338         /* Only one thread can come in here and initialize the
1339          * hh_cache entry.
1340          */
1341         if (!hh->hh_len)
1342                 dev->header_ops->cache(n, hh, prot);
1343
1344         write_unlock_bh(&n->lock);
1345 }
1346
1347 /* Slow and careful. */
1348
1349 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1350 {
1351         int rc = 0;
1352
1353         if (!neigh_event_send(neigh, skb)) {
1354                 int err;
1355                 struct net_device *dev = neigh->dev;
1356                 unsigned int seq;
1357
1358                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1359                         neigh_hh_init(neigh);
1360
1361                 do {
1362                         __skb_pull(skb, skb_network_offset(skb));
1363                         seq = read_seqbegin(&neigh->ha_lock);
1364                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1365                                               neigh->ha, NULL, skb->len);
1366                 } while (read_seqretry(&neigh->ha_lock, seq));
1367
1368                 if (err >= 0)
1369                         rc = dev_queue_xmit(skb);
1370                 else
1371                         goto out_kfree_skb;
1372         }
1373 out:
1374         return rc;
1375 out_kfree_skb:
1376         rc = -EINVAL;
1377         kfree_skb(skb);
1378         goto out;
1379 }
1380 EXPORT_SYMBOL(neigh_resolve_output);
1381
1382 /* As fast as possible without hh cache */
1383
1384 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1385 {
1386         struct net_device *dev = neigh->dev;
1387         unsigned int seq;
1388         int err;
1389
1390         do {
1391                 __skb_pull(skb, skb_network_offset(skb));
1392                 seq = read_seqbegin(&neigh->ha_lock);
1393                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1394                                       neigh->ha, NULL, skb->len);
1395         } while (read_seqretry(&neigh->ha_lock, seq));
1396
1397         if (err >= 0)
1398                 err = dev_queue_xmit(skb);
1399         else {
1400                 err = -EINVAL;
1401                 kfree_skb(skb);
1402         }
1403         return err;
1404 }
1405 EXPORT_SYMBOL(neigh_connected_output);
1406
1407 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1408 {
1409         return dev_queue_xmit(skb);
1410 }
1411 EXPORT_SYMBOL(neigh_direct_output);
1412
1413 static void neigh_proxy_process(unsigned long arg)
1414 {
1415         struct neigh_table *tbl = (struct neigh_table *)arg;
1416         long sched_next = 0;
1417         unsigned long now = jiffies;
1418         struct sk_buff *skb, *n;
1419
1420         spin_lock(&tbl->proxy_queue.lock);
1421
1422         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1423                 long tdif = NEIGH_CB(skb)->sched_next - now;
1424
1425                 if (tdif <= 0) {
1426                         struct net_device *dev = skb->dev;
1427
1428                         __skb_unlink(skb, &tbl->proxy_queue);
1429                         if (tbl->proxy_redo && netif_running(dev)) {
1430                                 rcu_read_lock();
1431                                 tbl->proxy_redo(skb);
1432                                 rcu_read_unlock();
1433                         } else {
1434                                 kfree_skb(skb);
1435                         }
1436
1437                         dev_put(dev);
1438                 } else if (!sched_next || tdif < sched_next)
1439                         sched_next = tdif;
1440         }
1441         del_timer(&tbl->proxy_timer);
1442         if (sched_next)
1443                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1444         spin_unlock(&tbl->proxy_queue.lock);
1445 }
1446
1447 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1448                     struct sk_buff *skb)
1449 {
1450         unsigned long now = jiffies;
1451
1452         unsigned long sched_next = now + (prandom_u32() %
1453                                           NEIGH_VAR(p, PROXY_DELAY));
1454
1455         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1456                 kfree_skb(skb);
1457                 return;
1458         }
1459
1460         NEIGH_CB(skb)->sched_next = sched_next;
1461         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1462
1463         spin_lock(&tbl->proxy_queue.lock);
1464         if (del_timer(&tbl->proxy_timer)) {
1465                 if (time_before(tbl->proxy_timer.expires, sched_next))
1466                         sched_next = tbl->proxy_timer.expires;
1467         }
1468         skb_dst_drop(skb);
1469         dev_hold(skb->dev);
1470         __skb_queue_tail(&tbl->proxy_queue, skb);
1471         mod_timer(&tbl->proxy_timer, sched_next);
1472         spin_unlock(&tbl->proxy_queue.lock);
1473 }
1474 EXPORT_SYMBOL(pneigh_enqueue);
1475
1476 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1477                                                       struct net *net, int ifindex)
1478 {
1479         struct neigh_parms *p;
1480
1481         list_for_each_entry(p, &tbl->parms_list, list) {
1482                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1483                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1484                         return p;
1485         }
1486
1487         return NULL;
1488 }
1489
1490 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1491                                       struct neigh_table *tbl)
1492 {
1493         struct neigh_parms *p;
1494         struct net *net = dev_net(dev);
1495         const struct net_device_ops *ops = dev->netdev_ops;
1496
1497         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1498         if (p) {
1499                 p->tbl            = tbl;
1500                 refcount_set(&p->refcnt, 1);
1501                 p->reachable_time =
1502                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1503                 dev_hold(dev);
1504                 p->dev = dev;
1505                 write_pnet(&p->net, net);
1506                 p->sysctl_table = NULL;
1507
1508                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1509                         dev_put(dev);
1510                         kfree(p);
1511                         return NULL;
1512                 }
1513
1514                 write_lock_bh(&tbl->lock);
1515                 list_add(&p->list, &tbl->parms.list);
1516                 write_unlock_bh(&tbl->lock);
1517
1518                 neigh_parms_data_state_cleanall(p);
1519         }
1520         return p;
1521 }
1522 EXPORT_SYMBOL(neigh_parms_alloc);
1523
1524 static void neigh_rcu_free_parms(struct rcu_head *head)
1525 {
1526         struct neigh_parms *parms =
1527                 container_of(head, struct neigh_parms, rcu_head);
1528
1529         neigh_parms_put(parms);
1530 }
1531
1532 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1533 {
1534         if (!parms || parms == &tbl->parms)
1535                 return;
1536         write_lock_bh(&tbl->lock);
1537         list_del(&parms->list);
1538         parms->dead = 1;
1539         write_unlock_bh(&tbl->lock);
1540         if (parms->dev)
1541                 dev_put(parms->dev);
1542         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1543 }
1544 EXPORT_SYMBOL(neigh_parms_release);
1545
1546 static void neigh_parms_destroy(struct neigh_parms *parms)
1547 {
1548         kfree(parms);
1549 }
1550
1551 static struct lock_class_key neigh_table_proxy_queue_class;
1552
1553 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1554
1555 void neigh_table_init(int index, struct neigh_table *tbl)
1556 {
1557         unsigned long now = jiffies;
1558         unsigned long phsize;
1559
1560         INIT_LIST_HEAD(&tbl->parms_list);
1561         list_add(&tbl->parms.list, &tbl->parms_list);
1562         write_pnet(&tbl->parms.net, &init_net);
1563         refcount_set(&tbl->parms.refcnt, 1);
1564         tbl->parms.reachable_time =
1565                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1566
1567         tbl->stats = alloc_percpu(struct neigh_statistics);
1568         if (!tbl->stats)
1569                 panic("cannot create neighbour cache statistics");
1570
1571 #ifdef CONFIG_PROC_FS
1572         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1573                               &neigh_stat_seq_fops, tbl))
1574                 panic("cannot create neighbour proc dir entry");
1575 #endif
1576
1577         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1578
1579         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1580         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1581
1582         if (!tbl->nht || !tbl->phash_buckets)
1583                 panic("cannot allocate neighbour cache hashes");
1584
1585         if (!tbl->entry_size)
1586                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1587                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1588         else
1589                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1590
1591         rwlock_init(&tbl->lock);
1592         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1593         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1594                         tbl->parms.reachable_time);
1595         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1596         skb_queue_head_init_class(&tbl->proxy_queue,
1597                         &neigh_table_proxy_queue_class);
1598
1599         tbl->last_flush = now;
1600         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1601
1602         neigh_tables[index] = tbl;
1603 }
1604 EXPORT_SYMBOL(neigh_table_init);
1605
1606 int neigh_table_clear(int index, struct neigh_table *tbl)
1607 {
1608         neigh_tables[index] = NULL;
1609         /* It is not clean... Fix it to unload IPv6 module safely */
1610         cancel_delayed_work_sync(&tbl->gc_work);
1611         del_timer_sync(&tbl->proxy_timer);
1612         pneigh_queue_purge(&tbl->proxy_queue);
1613         neigh_ifdown(tbl, NULL);
1614         if (atomic_read(&tbl->entries))
1615                 pr_crit("neighbour leakage\n");
1616
1617         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1618                  neigh_hash_free_rcu);
1619         tbl->nht = NULL;
1620
1621         kfree(tbl->phash_buckets);
1622         tbl->phash_buckets = NULL;
1623
1624         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1625
1626         free_percpu(tbl->stats);
1627         tbl->stats = NULL;
1628
1629         return 0;
1630 }
1631 EXPORT_SYMBOL(neigh_table_clear);
1632
1633 static struct neigh_table *neigh_find_table(int family)
1634 {
1635         struct neigh_table *tbl = NULL;
1636
1637         switch (family) {
1638         case AF_INET:
1639                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1640                 break;
1641         case AF_INET6:
1642                 tbl = neigh_tables[NEIGH_ND_TABLE];
1643                 break;
1644         case AF_DECnet:
1645                 tbl = neigh_tables[NEIGH_DN_TABLE];
1646                 break;
1647         }
1648
1649         return tbl;
1650 }
1651
1652 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1653                         struct netlink_ext_ack *extack)
1654 {
1655         struct net *net = sock_net(skb->sk);
1656         struct ndmsg *ndm;
1657         struct nlattr *dst_attr;
1658         struct neigh_table *tbl;
1659         struct neighbour *neigh;
1660         struct net_device *dev = NULL;
1661         int err = -EINVAL;
1662
1663         ASSERT_RTNL();
1664         if (nlmsg_len(nlh) < sizeof(*ndm))
1665                 goto out;
1666
1667         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1668         if (dst_attr == NULL)
1669                 goto out;
1670
1671         ndm = nlmsg_data(nlh);
1672         if (ndm->ndm_ifindex) {
1673                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1674                 if (dev == NULL) {
1675                         err = -ENODEV;
1676                         goto out;
1677                 }
1678         }
1679
1680         tbl = neigh_find_table(ndm->ndm_family);
1681         if (tbl == NULL)
1682                 return -EAFNOSUPPORT;
1683
1684         if (nla_len(dst_attr) < tbl->key_len)
1685                 goto out;
1686
1687         if (ndm->ndm_flags & NTF_PROXY) {
1688                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1689                 goto out;
1690         }
1691
1692         if (dev == NULL)
1693                 goto out;
1694
1695         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1696         if (neigh == NULL) {
1697                 err = -ENOENT;
1698                 goto out;
1699         }
1700
1701         err = neigh_update(neigh, NULL, NUD_FAILED,
1702                            NEIGH_UPDATE_F_OVERRIDE |
1703                            NEIGH_UPDATE_F_ADMIN,
1704                            NETLINK_CB(skb).portid);
1705         write_lock_bh(&tbl->lock);
1706         neigh_release(neigh);
1707         neigh_remove_one(neigh, tbl);
1708         write_unlock_bh(&tbl->lock);
1709
1710 out:
1711         return err;
1712 }
1713
1714 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1715                      struct netlink_ext_ack *extack)
1716 {
1717         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1718         struct net *net = sock_net(skb->sk);
1719         struct ndmsg *ndm;
1720         struct nlattr *tb[NDA_MAX+1];
1721         struct neigh_table *tbl;
1722         struct net_device *dev = NULL;
1723         struct neighbour *neigh;
1724         void *dst, *lladdr;
1725         int err;
1726
1727         ASSERT_RTNL();
1728         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1729         if (err < 0)
1730                 goto out;
1731
1732         err = -EINVAL;
1733         if (tb[NDA_DST] == NULL)
1734                 goto out;
1735
1736         ndm = nlmsg_data(nlh);
1737         if (ndm->ndm_ifindex) {
1738                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1739                 if (dev == NULL) {
1740                         err = -ENODEV;
1741                         goto out;
1742                 }
1743
1744                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1745                         goto out;
1746         }
1747
1748         tbl = neigh_find_table(ndm->ndm_family);
1749         if (tbl == NULL)
1750                 return -EAFNOSUPPORT;
1751
1752         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1753                 goto out;
1754         dst = nla_data(tb[NDA_DST]);
1755         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1756
1757         if (ndm->ndm_flags & NTF_PROXY) {
1758                 struct pneigh_entry *pn;
1759
1760                 err = -ENOBUFS;
1761                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1762                 if (pn) {
1763                         pn->flags = ndm->ndm_flags;
1764                         err = 0;
1765                 }
1766                 goto out;
1767         }
1768
1769         if (dev == NULL)
1770                 goto out;
1771
1772         neigh = neigh_lookup(tbl, dst, dev);
1773         if (neigh == NULL) {
1774                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1775                         err = -ENOENT;
1776                         goto out;
1777                 }
1778
1779                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1780                 if (IS_ERR(neigh)) {
1781                         err = PTR_ERR(neigh);
1782                         goto out;
1783                 }
1784         } else {
1785                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1786                         err = -EEXIST;
1787                         neigh_release(neigh);
1788                         goto out;
1789                 }
1790
1791                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1792                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1793         }
1794
1795         if (ndm->ndm_flags & NTF_USE) {
1796                 neigh_event_send(neigh, NULL);
1797                 err = 0;
1798         } else
1799                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1800                                    NETLINK_CB(skb).portid);
1801         neigh_release(neigh);
1802
1803 out:
1804         return err;
1805 }
1806
1807 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1808 {
1809         struct nlattr *nest;
1810
1811         nest = nla_nest_start(skb, NDTA_PARMS);
1812         if (nest == NULL)
1813                 return -ENOBUFS;
1814
1815         if ((parms->dev &&
1816              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1817             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1818             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1819                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1820             /* approximative value for deprecated QUEUE_LEN (in packets) */
1821             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1822                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1823             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1824             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1825             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1826                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1827             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1828                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1829             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1830                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1831             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1832                           NDTPA_PAD) ||
1833             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1834                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1835             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1836                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1837             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1838                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1839             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1840                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1841             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1842                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1843             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1844                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1845             nla_put_msecs(skb, NDTPA_LOCKTIME,
1846                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1847                 goto nla_put_failure;
1848         return nla_nest_end(skb, nest);
1849
1850 nla_put_failure:
1851         nla_nest_cancel(skb, nest);
1852         return -EMSGSIZE;
1853 }
1854
1855 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1856                               u32 pid, u32 seq, int type, int flags)
1857 {
1858         struct nlmsghdr *nlh;
1859         struct ndtmsg *ndtmsg;
1860
1861         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1862         if (nlh == NULL)
1863                 return -EMSGSIZE;
1864
1865         ndtmsg = nlmsg_data(nlh);
1866
1867         read_lock_bh(&tbl->lock);
1868         ndtmsg->ndtm_family = tbl->family;
1869         ndtmsg->ndtm_pad1   = 0;
1870         ndtmsg->ndtm_pad2   = 0;
1871
1872         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1873             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1874             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1875             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1876             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1877                 goto nla_put_failure;
1878         {
1879                 unsigned long now = jiffies;
1880                 long flush_delta = now - tbl->last_flush;
1881                 long rand_delta = now - tbl->last_rand;
1882                 struct neigh_hash_table *nht;
1883                 struct ndt_config ndc = {
1884                         .ndtc_key_len           = tbl->key_len,
1885                         .ndtc_entry_size        = tbl->entry_size,
1886                         .ndtc_entries           = atomic_read(&tbl->entries),
1887                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1888                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1889                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1890                 };
1891
1892                 rcu_read_lock_bh();
1893                 nht = rcu_dereference_bh(tbl->nht);
1894                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1895                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1896                 rcu_read_unlock_bh();
1897
1898                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1899                         goto nla_put_failure;
1900         }
1901
1902         {
1903                 int cpu;
1904                 struct ndt_stats ndst;
1905
1906                 memset(&ndst, 0, sizeof(ndst));
1907
1908                 for_each_possible_cpu(cpu) {
1909                         struct neigh_statistics *st;
1910
1911                         st = per_cpu_ptr(tbl->stats, cpu);
1912                         ndst.ndts_allocs                += st->allocs;
1913                         ndst.ndts_destroys              += st->destroys;
1914                         ndst.ndts_hash_grows            += st->hash_grows;
1915                         ndst.ndts_res_failed            += st->res_failed;
1916                         ndst.ndts_lookups               += st->lookups;
1917                         ndst.ndts_hits                  += st->hits;
1918                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1919                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1920                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1921                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1922                         ndst.ndts_table_fulls           += st->table_fulls;
1923                 }
1924
1925                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1926                                   NDTA_PAD))
1927                         goto nla_put_failure;
1928         }
1929
1930         BUG_ON(tbl->parms.dev);
1931         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1932                 goto nla_put_failure;
1933
1934         read_unlock_bh(&tbl->lock);
1935         nlmsg_end(skb, nlh);
1936         return 0;
1937
1938 nla_put_failure:
1939         read_unlock_bh(&tbl->lock);
1940         nlmsg_cancel(skb, nlh);
1941         return -EMSGSIZE;
1942 }
1943
1944 static int neightbl_fill_param_info(struct sk_buff *skb,
1945                                     struct neigh_table *tbl,
1946                                     struct neigh_parms *parms,
1947                                     u32 pid, u32 seq, int type,
1948                                     unsigned int flags)
1949 {
1950         struct ndtmsg *ndtmsg;
1951         struct nlmsghdr *nlh;
1952
1953         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1954         if (nlh == NULL)
1955                 return -EMSGSIZE;
1956
1957         ndtmsg = nlmsg_data(nlh);
1958
1959         read_lock_bh(&tbl->lock);
1960         ndtmsg->ndtm_family = tbl->family;
1961         ndtmsg->ndtm_pad1   = 0;
1962         ndtmsg->ndtm_pad2   = 0;
1963
1964         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1965             neightbl_fill_parms(skb, parms) < 0)
1966                 goto errout;
1967
1968         read_unlock_bh(&tbl->lock);
1969         nlmsg_end(skb, nlh);
1970         return 0;
1971 errout:
1972         read_unlock_bh(&tbl->lock);
1973         nlmsg_cancel(skb, nlh);
1974         return -EMSGSIZE;
1975 }
1976
1977 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1978         [NDTA_NAME]             = { .type = NLA_STRING },
1979         [NDTA_THRESH1]          = { .type = NLA_U32 },
1980         [NDTA_THRESH2]          = { .type = NLA_U32 },
1981         [NDTA_THRESH3]          = { .type = NLA_U32 },
1982         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1983         [NDTA_PARMS]            = { .type = NLA_NESTED },
1984 };
1985
1986 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1987         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1988         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1989         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1990         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1991         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1992         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1993         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1994         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1995         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1996         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1997         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1998         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1999         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2000         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2001 };
2002
2003 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2004                         struct netlink_ext_ack *extack)
2005 {
2006         struct net *net = sock_net(skb->sk);
2007         struct neigh_table *tbl;
2008         struct ndtmsg *ndtmsg;
2009         struct nlattr *tb[NDTA_MAX+1];
2010         bool found = false;
2011         int err, tidx;
2012
2013         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2014                           nl_neightbl_policy, extack);
2015         if (err < 0)
2016                 goto errout;
2017
2018         if (tb[NDTA_NAME] == NULL) {
2019                 err = -EINVAL;
2020                 goto errout;
2021         }
2022
2023         ndtmsg = nlmsg_data(nlh);
2024
2025         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2026                 tbl = neigh_tables[tidx];
2027                 if (!tbl)
2028                         continue;
2029                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2030                         continue;
2031                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2032                         found = true;
2033                         break;
2034                 }
2035         }
2036
2037         if (!found)
2038                 return -ENOENT;
2039
2040         /*
2041          * We acquire tbl->lock to be nice to the periodic timers and
2042          * make sure they always see a consistent set of values.
2043          */
2044         write_lock_bh(&tbl->lock);
2045
2046         if (tb[NDTA_PARMS]) {
2047                 struct nlattr *tbp[NDTPA_MAX+1];
2048                 struct neigh_parms *p;
2049                 int i, ifindex = 0;
2050
2051                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2052                                        nl_ntbl_parm_policy, extack);
2053                 if (err < 0)
2054                         goto errout_tbl_lock;
2055
2056                 if (tbp[NDTPA_IFINDEX])
2057                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2058
2059                 p = lookup_neigh_parms(tbl, net, ifindex);
2060                 if (p == NULL) {
2061                         err = -ENOENT;
2062                         goto errout_tbl_lock;
2063                 }
2064
2065                 for (i = 1; i <= NDTPA_MAX; i++) {
2066                         if (tbp[i] == NULL)
2067                                 continue;
2068
2069                         switch (i) {
2070                         case NDTPA_QUEUE_LEN:
2071                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2072                                               nla_get_u32(tbp[i]) *
2073                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2074                                 break;
2075                         case NDTPA_QUEUE_LENBYTES:
2076                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2077                                               nla_get_u32(tbp[i]));
2078                                 break;
2079                         case NDTPA_PROXY_QLEN:
2080                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2081                                               nla_get_u32(tbp[i]));
2082                                 break;
2083                         case NDTPA_APP_PROBES:
2084                                 NEIGH_VAR_SET(p, APP_PROBES,
2085                                               nla_get_u32(tbp[i]));
2086                                 break;
2087                         case NDTPA_UCAST_PROBES:
2088                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2089                                               nla_get_u32(tbp[i]));
2090                                 break;
2091                         case NDTPA_MCAST_PROBES:
2092                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2093                                               nla_get_u32(tbp[i]));
2094                                 break;
2095                         case NDTPA_MCAST_REPROBES:
2096                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2097                                               nla_get_u32(tbp[i]));
2098                                 break;
2099                         case NDTPA_BASE_REACHABLE_TIME:
2100                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2101                                               nla_get_msecs(tbp[i]));
2102                                 /* update reachable_time as well, otherwise, the change will
2103                                  * only be effective after the next time neigh_periodic_work
2104                                  * decides to recompute it (can be multiple minutes)
2105                                  */
2106                                 p->reachable_time =
2107                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2108                                 break;
2109                         case NDTPA_GC_STALETIME:
2110                                 NEIGH_VAR_SET(p, GC_STALETIME,
2111                                               nla_get_msecs(tbp[i]));
2112                                 break;
2113                         case NDTPA_DELAY_PROBE_TIME:
2114                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2115                                               nla_get_msecs(tbp[i]));
2116                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2117                                 break;
2118                         case NDTPA_RETRANS_TIME:
2119                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2120                                               nla_get_msecs(tbp[i]));
2121                                 break;
2122                         case NDTPA_ANYCAST_DELAY:
2123                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2124                                               nla_get_msecs(tbp[i]));
2125                                 break;
2126                         case NDTPA_PROXY_DELAY:
2127                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2128                                               nla_get_msecs(tbp[i]));
2129                                 break;
2130                         case NDTPA_LOCKTIME:
2131                                 NEIGH_VAR_SET(p, LOCKTIME,
2132                                               nla_get_msecs(tbp[i]));
2133                                 break;
2134                         }
2135                 }
2136         }
2137
2138         err = -ENOENT;
2139         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2140              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2141             !net_eq(net, &init_net))
2142                 goto errout_tbl_lock;
2143
2144         if (tb[NDTA_THRESH1])
2145                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2146
2147         if (tb[NDTA_THRESH2])
2148                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2149
2150         if (tb[NDTA_THRESH3])
2151                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2152
2153         if (tb[NDTA_GC_INTERVAL])
2154                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2155
2156         err = 0;
2157
2158 errout_tbl_lock:
2159         write_unlock_bh(&tbl->lock);
2160 errout:
2161         return err;
2162 }
2163
2164 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2165 {
2166         struct net *net = sock_net(skb->sk);
2167         int family, tidx, nidx = 0;
2168         int tbl_skip = cb->args[0];
2169         int neigh_skip = cb->args[1];
2170         struct neigh_table *tbl;
2171
2172         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2173
2174         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2175                 struct neigh_parms *p;
2176
2177                 tbl = neigh_tables[tidx];
2178                 if (!tbl)
2179                         continue;
2180
2181                 if (tidx < tbl_skip || (family && tbl->family != family))
2182                         continue;
2183
2184                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2185                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2186                                        NLM_F_MULTI) < 0)
2187                         break;
2188
2189                 nidx = 0;
2190                 p = list_next_entry(&tbl->parms, list);
2191                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2192                         if (!net_eq(neigh_parms_net(p), net))
2193                                 continue;
2194
2195                         if (nidx < neigh_skip)
2196                                 goto next;
2197
2198                         if (neightbl_fill_param_info(skb, tbl, p,
2199                                                      NETLINK_CB(cb->skb).portid,
2200                                                      cb->nlh->nlmsg_seq,
2201                                                      RTM_NEWNEIGHTBL,
2202                                                      NLM_F_MULTI) < 0)
2203                                 goto out;
2204                 next:
2205                         nidx++;
2206                 }
2207
2208                 neigh_skip = 0;
2209         }
2210 out:
2211         cb->args[0] = tidx;
2212         cb->args[1] = nidx;
2213
2214         return skb->len;
2215 }
2216
2217 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2218                            u32 pid, u32 seq, int type, unsigned int flags)
2219 {
2220         unsigned long now = jiffies;
2221         struct nda_cacheinfo ci;
2222         struct nlmsghdr *nlh;
2223         struct ndmsg *ndm;
2224
2225         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2226         if (nlh == NULL)
2227                 return -EMSGSIZE;
2228
2229         ndm = nlmsg_data(nlh);
2230         ndm->ndm_family  = neigh->ops->family;
2231         ndm->ndm_pad1    = 0;
2232         ndm->ndm_pad2    = 0;
2233         ndm->ndm_flags   = neigh->flags;
2234         ndm->ndm_type    = neigh->type;
2235         ndm->ndm_ifindex = neigh->dev->ifindex;
2236
2237         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2238                 goto nla_put_failure;
2239
2240         read_lock_bh(&neigh->lock);
2241         ndm->ndm_state   = neigh->nud_state;
2242         if (neigh->nud_state & NUD_VALID) {
2243                 char haddr[MAX_ADDR_LEN];
2244
2245                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2246                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2247                         read_unlock_bh(&neigh->lock);
2248                         goto nla_put_failure;
2249                 }
2250         }
2251
2252         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2253         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2254         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2255         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2256         read_unlock_bh(&neigh->lock);
2257
2258         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2259             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2260                 goto nla_put_failure;
2261
2262         nlmsg_end(skb, nlh);
2263         return 0;
2264
2265 nla_put_failure:
2266         nlmsg_cancel(skb, nlh);
2267         return -EMSGSIZE;
2268 }
2269
2270 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2271                             u32 pid, u32 seq, int type, unsigned int flags,
2272                             struct neigh_table *tbl)
2273 {
2274         struct nlmsghdr *nlh;
2275         struct ndmsg *ndm;
2276
2277         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2278         if (nlh == NULL)
2279                 return -EMSGSIZE;
2280
2281         ndm = nlmsg_data(nlh);
2282         ndm->ndm_family  = tbl->family;
2283         ndm->ndm_pad1    = 0;
2284         ndm->ndm_pad2    = 0;
2285         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2286         ndm->ndm_type    = RTN_UNICAST;
2287         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2288         ndm->ndm_state   = NUD_NONE;
2289
2290         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2291                 goto nla_put_failure;
2292
2293         nlmsg_end(skb, nlh);
2294         return 0;
2295
2296 nla_put_failure:
2297         nlmsg_cancel(skb, nlh);
2298         return -EMSGSIZE;
2299 }
2300
2301 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2302 {
2303         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2304         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2305 }
2306
2307 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2308 {
2309         struct net_device *master;
2310
2311         if (!master_idx)
2312                 return false;
2313
2314         master = netdev_master_upper_dev_get(dev);
2315         if (!master || master->ifindex != master_idx)
2316                 return true;
2317
2318         return false;
2319 }
2320
2321 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2322 {
2323         if (filter_idx && dev->ifindex != filter_idx)
2324                 return true;
2325
2326         return false;
2327 }
2328
2329 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2330                             struct netlink_callback *cb)
2331 {
2332         struct net *net = sock_net(skb->sk);
2333         const struct nlmsghdr *nlh = cb->nlh;
2334         struct nlattr *tb[NDA_MAX + 1];
2335         struct neighbour *n;
2336         int rc, h, s_h = cb->args[1];
2337         int idx, s_idx = idx = cb->args[2];
2338         struct neigh_hash_table *nht;
2339         int filter_master_idx = 0, filter_idx = 0;
2340         unsigned int flags = NLM_F_MULTI;
2341         int err;
2342
2343         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2344         if (!err) {
2345                 if (tb[NDA_IFINDEX]) {
2346                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2347                                 return -EINVAL;
2348                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2349                 }
2350                 if (tb[NDA_MASTER]) {
2351                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2352                                 return -EINVAL;
2353                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2354                 }
2355                 if (filter_idx || filter_master_idx)
2356                         flags |= NLM_F_DUMP_FILTERED;
2357         }
2358
2359         rcu_read_lock_bh();
2360         nht = rcu_dereference_bh(tbl->nht);
2361
2362         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2363                 if (h > s_h)
2364                         s_idx = 0;
2365                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2366                      n != NULL;
2367                      n = rcu_dereference_bh(n->next)) {
2368                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2369                                 goto next;
2370                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2371                             neigh_master_filtered(n->dev, filter_master_idx))
2372                                 goto next;
2373                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2374                                             cb->nlh->nlmsg_seq,
2375                                             RTM_NEWNEIGH,
2376                                             flags) < 0) {
2377                                 rc = -1;
2378                                 goto out;
2379                         }
2380 next:
2381                         idx++;
2382                 }
2383         }
2384         rc = skb->len;
2385 out:
2386         rcu_read_unlock_bh();
2387         cb->args[1] = h;
2388         cb->args[2] = idx;
2389         return rc;
2390 }
2391
2392 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2393                              struct netlink_callback *cb)
2394 {
2395         struct pneigh_entry *n;
2396         struct net *net = sock_net(skb->sk);
2397         int rc, h, s_h = cb->args[3];
2398         int idx, s_idx = idx = cb->args[4];
2399
2400         read_lock_bh(&tbl->lock);
2401
2402         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2403                 if (h > s_h)
2404                         s_idx = 0;
2405                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2406                         if (idx < s_idx || pneigh_net(n) != net)
2407                                 goto next;
2408                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2409                                             cb->nlh->nlmsg_seq,
2410                                             RTM_NEWNEIGH,
2411                                             NLM_F_MULTI, tbl) < 0) {
2412                                 read_unlock_bh(&tbl->lock);
2413                                 rc = -1;
2414                                 goto out;
2415                         }
2416                 next:
2417                         idx++;
2418                 }
2419         }
2420
2421         read_unlock_bh(&tbl->lock);
2422         rc = skb->len;
2423 out:
2424         cb->args[3] = h;
2425         cb->args[4] = idx;
2426         return rc;
2427
2428 }
2429
2430 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2431 {
2432         struct neigh_table *tbl;
2433         int t, family, s_t;
2434         int proxy = 0;
2435         int err;
2436
2437         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2438
2439         /* check for full ndmsg structure presence, family member is
2440          * the same for both structures
2441          */
2442         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2443             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2444                 proxy = 1;
2445
2446         s_t = cb->args[0];
2447
2448         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2449                 tbl = neigh_tables[t];
2450
2451                 if (!tbl)
2452                         continue;
2453                 if (t < s_t || (family && tbl->family != family))
2454                         continue;
2455                 if (t > s_t)
2456                         memset(&cb->args[1], 0, sizeof(cb->args) -
2457                                                 sizeof(cb->args[0]));
2458                 if (proxy)
2459                         err = pneigh_dump_table(tbl, skb, cb);
2460                 else
2461                         err = neigh_dump_table(tbl, skb, cb);
2462                 if (err < 0)
2463                         break;
2464         }
2465
2466         cb->args[0] = t;
2467         return skb->len;
2468 }
2469
2470 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2471 {
2472         int chain;
2473         struct neigh_hash_table *nht;
2474
2475         rcu_read_lock_bh();
2476         nht = rcu_dereference_bh(tbl->nht);
2477
2478         read_lock(&tbl->lock); /* avoid resizes */
2479         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2480                 struct neighbour *n;
2481
2482                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2483                      n != NULL;
2484                      n = rcu_dereference_bh(n->next))
2485                         cb(n, cookie);
2486         }
2487         read_unlock(&tbl->lock);
2488         rcu_read_unlock_bh();
2489 }
2490 EXPORT_SYMBOL(neigh_for_each);
2491
2492 /* The tbl->lock must be held as a writer and BH disabled. */
2493 void __neigh_for_each_release(struct neigh_table *tbl,
2494                               int (*cb)(struct neighbour *))
2495 {
2496         int chain;
2497         struct neigh_hash_table *nht;
2498
2499         nht = rcu_dereference_protected(tbl->nht,
2500                                         lockdep_is_held(&tbl->lock));
2501         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2502                 struct neighbour *n;
2503                 struct neighbour __rcu **np;
2504
2505                 np = &nht->hash_buckets[chain];
2506                 while ((n = rcu_dereference_protected(*np,
2507                                         lockdep_is_held(&tbl->lock))) != NULL) {
2508                         int release;
2509
2510                         write_lock(&n->lock);
2511                         release = cb(n);
2512                         if (release) {
2513                                 rcu_assign_pointer(*np,
2514                                         rcu_dereference_protected(n->next,
2515                                                 lockdep_is_held(&tbl->lock)));
2516                                 n->dead = 1;
2517                         } else
2518                                 np = &n->next;
2519                         write_unlock(&n->lock);
2520                         if (release)
2521                                 neigh_cleanup_and_release(n);
2522                 }
2523         }
2524 }
2525 EXPORT_SYMBOL(__neigh_for_each_release);
2526
2527 int neigh_xmit(int index, struct net_device *dev,
2528                const void *addr, struct sk_buff *skb)
2529 {
2530         int err = -EAFNOSUPPORT;
2531         if (likely(index < NEIGH_NR_TABLES)) {
2532                 struct neigh_table *tbl;
2533                 struct neighbour *neigh;
2534
2535                 tbl = neigh_tables[index];
2536                 if (!tbl)
2537                         goto out;
2538                 rcu_read_lock_bh();
2539                 if (index == NEIGH_ARP_TABLE) {
2540                         u32 key = *((u32 *)addr);
2541
2542                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2543                 } else {
2544                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2545                 }
2546                 if (!neigh)
2547                         neigh = __neigh_create(tbl, addr, dev, false);
2548                 err = PTR_ERR(neigh);
2549                 if (IS_ERR(neigh)) {
2550                         rcu_read_unlock_bh();
2551                         goto out_kfree_skb;
2552                 }
2553                 err = neigh->output(neigh, skb);
2554                 rcu_read_unlock_bh();
2555         }
2556         else if (index == NEIGH_LINK_TABLE) {
2557                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2558                                       addr, NULL, skb->len);
2559                 if (err < 0)
2560                         goto out_kfree_skb;
2561                 err = dev_queue_xmit(skb);
2562         }
2563 out:
2564         return err;
2565 out_kfree_skb:
2566         kfree_skb(skb);
2567         goto out;
2568 }
2569 EXPORT_SYMBOL(neigh_xmit);
2570
2571 #ifdef CONFIG_PROC_FS
2572
2573 static struct neighbour *neigh_get_first(struct seq_file *seq)
2574 {
2575         struct neigh_seq_state *state = seq->private;
2576         struct net *net = seq_file_net(seq);
2577         struct neigh_hash_table *nht = state->nht;
2578         struct neighbour *n = NULL;
2579         int bucket = state->bucket;
2580
2581         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2582         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2583                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2584
2585                 while (n) {
2586                         if (!net_eq(dev_net(n->dev), net))
2587                                 goto next;
2588                         if (state->neigh_sub_iter) {
2589                                 loff_t fakep = 0;
2590                                 void *v;
2591
2592                                 v = state->neigh_sub_iter(state, n, &fakep);
2593                                 if (!v)
2594                                         goto next;
2595                         }
2596                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2597                                 break;
2598                         if (n->nud_state & ~NUD_NOARP)
2599                                 break;
2600 next:
2601                         n = rcu_dereference_bh(n->next);
2602                 }
2603
2604                 if (n)
2605                         break;
2606         }
2607         state->bucket = bucket;
2608
2609         return n;
2610 }
2611
2612 static struct neighbour *neigh_get_next(struct seq_file *seq,
2613                                         struct neighbour *n,
2614                                         loff_t *pos)
2615 {
2616         struct neigh_seq_state *state = seq->private;
2617         struct net *net = seq_file_net(seq);
2618         struct neigh_hash_table *nht = state->nht;
2619
2620         if (state->neigh_sub_iter) {
2621                 void *v = state->neigh_sub_iter(state, n, pos);
2622                 if (v)
2623                         return n;
2624         }
2625         n = rcu_dereference_bh(n->next);
2626
2627         while (1) {
2628                 while (n) {
2629                         if (!net_eq(dev_net(n->dev), net))
2630                                 goto next;
2631                         if (state->neigh_sub_iter) {
2632                                 void *v = state->neigh_sub_iter(state, n, pos);
2633                                 if (v)
2634                                         return n;
2635                                 goto next;
2636                         }
2637                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2638                                 break;
2639
2640                         if (n->nud_state & ~NUD_NOARP)
2641                                 break;
2642 next:
2643                         n = rcu_dereference_bh(n->next);
2644                 }
2645
2646                 if (n)
2647                         break;
2648
2649                 if (++state->bucket >= (1 << nht->hash_shift))
2650                         break;
2651
2652                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2653         }
2654
2655         if (n && pos)
2656                 --(*pos);
2657         return n;
2658 }
2659
2660 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2661 {
2662         struct neighbour *n = neigh_get_first(seq);
2663
2664         if (n) {
2665                 --(*pos);
2666                 while (*pos) {
2667                         n = neigh_get_next(seq, n, pos);
2668                         if (!n)
2669                                 break;
2670                 }
2671         }
2672         return *pos ? NULL : n;
2673 }
2674
2675 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2676 {
2677         struct neigh_seq_state *state = seq->private;
2678         struct net *net = seq_file_net(seq);
2679         struct neigh_table *tbl = state->tbl;
2680         struct pneigh_entry *pn = NULL;
2681         int bucket = state->bucket;
2682
2683         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2684         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2685                 pn = tbl->phash_buckets[bucket];
2686                 while (pn && !net_eq(pneigh_net(pn), net))
2687                         pn = pn->next;
2688                 if (pn)
2689                         break;
2690         }
2691         state->bucket = bucket;
2692
2693         return pn;
2694 }
2695
2696 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2697                                             struct pneigh_entry *pn,
2698                                             loff_t *pos)
2699 {
2700         struct neigh_seq_state *state = seq->private;
2701         struct net *net = seq_file_net(seq);
2702         struct neigh_table *tbl = state->tbl;
2703
2704         do {
2705                 pn = pn->next;
2706         } while (pn && !net_eq(pneigh_net(pn), net));
2707
2708         while (!pn) {
2709                 if (++state->bucket > PNEIGH_HASHMASK)
2710                         break;
2711                 pn = tbl->phash_buckets[state->bucket];
2712                 while (pn && !net_eq(pneigh_net(pn), net))
2713                         pn = pn->next;
2714                 if (pn)
2715                         break;
2716         }
2717
2718         if (pn && pos)
2719                 --(*pos);
2720
2721         return pn;
2722 }
2723
2724 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2725 {
2726         struct pneigh_entry *pn = pneigh_get_first(seq);
2727
2728         if (pn) {
2729                 --(*pos);
2730                 while (*pos) {
2731                         pn = pneigh_get_next(seq, pn, pos);
2732                         if (!pn)
2733                                 break;
2734                 }
2735         }
2736         return *pos ? NULL : pn;
2737 }
2738
2739 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2740 {
2741         struct neigh_seq_state *state = seq->private;
2742         void *rc;
2743         loff_t idxpos = *pos;
2744
2745         rc = neigh_get_idx(seq, &idxpos);
2746         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2747                 rc = pneigh_get_idx(seq, &idxpos);
2748
2749         return rc;
2750 }
2751
2752 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2753         __acquires(tbl->lock)
2754         __acquires(rcu_bh)
2755 {
2756         struct neigh_seq_state *state = seq->private;
2757
2758         state->tbl = tbl;
2759         state->bucket = 0;
2760         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2761
2762         rcu_read_lock_bh();
2763         state->nht = rcu_dereference_bh(tbl->nht);
2764         read_lock(&tbl->lock);
2765
2766         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2767 }
2768 EXPORT_SYMBOL(neigh_seq_start);
2769
2770 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2771 {
2772         struct neigh_seq_state *state;
2773         void *rc;
2774
2775         if (v == SEQ_START_TOKEN) {
2776                 rc = neigh_get_first(seq);
2777                 goto out;
2778         }
2779
2780         state = seq->private;
2781         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2782                 rc = neigh_get_next(seq, v, NULL);
2783                 if (rc)
2784                         goto out;
2785                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2786                         rc = pneigh_get_first(seq);
2787         } else {
2788                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2789                 rc = pneigh_get_next(seq, v, NULL);
2790         }
2791 out:
2792         ++(*pos);
2793         return rc;
2794 }
2795 EXPORT_SYMBOL(neigh_seq_next);
2796
2797 void neigh_seq_stop(struct seq_file *seq, void *v)
2798         __releases(tbl->lock)
2799         __releases(rcu_bh)
2800 {
2801         struct neigh_seq_state *state = seq->private;
2802         struct neigh_table *tbl = state->tbl;
2803
2804         read_unlock(&tbl->lock);
2805         rcu_read_unlock_bh();
2806 }
2807 EXPORT_SYMBOL(neigh_seq_stop);
2808
2809 /* statistics via seq_file */
2810
2811 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2812 {
2813         struct neigh_table *tbl = seq->private;
2814         int cpu;
2815
2816         if (*pos == 0)
2817                 return SEQ_START_TOKEN;
2818
2819         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2820                 if (!cpu_possible(cpu))
2821                         continue;
2822                 *pos = cpu+1;
2823                 return per_cpu_ptr(tbl->stats, cpu);
2824         }
2825         return NULL;
2826 }
2827
2828 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2829 {
2830         struct neigh_table *tbl = seq->private;
2831         int cpu;
2832
2833         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2834                 if (!cpu_possible(cpu))
2835                         continue;
2836                 *pos = cpu+1;
2837                 return per_cpu_ptr(tbl->stats, cpu);
2838         }
2839         (*pos)++;
2840         return NULL;
2841 }
2842
2843 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2844 {
2845
2846 }
2847
2848 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2849 {
2850         struct neigh_table *tbl = seq->private;
2851         struct neigh_statistics *st = v;
2852
2853         if (v == SEQ_START_TOKEN) {
2854                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2855                 return 0;
2856         }
2857
2858         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2859                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2860                    atomic_read(&tbl->entries),
2861
2862                    st->allocs,
2863                    st->destroys,
2864                    st->hash_grows,
2865
2866                    st->lookups,
2867                    st->hits,
2868
2869                    st->res_failed,
2870
2871                    st->rcv_probes_mcast,
2872                    st->rcv_probes_ucast,
2873
2874                    st->periodic_gc_runs,
2875                    st->forced_gc_runs,
2876                    st->unres_discards,
2877                    st->table_fulls
2878                    );
2879
2880         return 0;
2881 }
2882
2883 static const struct seq_operations neigh_stat_seq_ops = {
2884         .start  = neigh_stat_seq_start,
2885         .next   = neigh_stat_seq_next,
2886         .stop   = neigh_stat_seq_stop,
2887         .show   = neigh_stat_seq_show,
2888 };
2889
2890 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2891 {
2892         int ret = seq_open(file, &neigh_stat_seq_ops);
2893
2894         if (!ret) {
2895                 struct seq_file *sf = file->private_data;
2896                 sf->private = PDE_DATA(inode);
2897         }
2898         return ret;
2899 };
2900
2901 static const struct file_operations neigh_stat_seq_fops = {
2902         .owner   = THIS_MODULE,
2903         .open    = neigh_stat_seq_open,
2904         .read    = seq_read,
2905         .llseek  = seq_lseek,
2906         .release = seq_release,
2907 };
2908
2909 #endif /* CONFIG_PROC_FS */
2910
2911 static inline size_t neigh_nlmsg_size(void)
2912 {
2913         return NLMSG_ALIGN(sizeof(struct ndmsg))
2914                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2915                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2916                + nla_total_size(sizeof(struct nda_cacheinfo))
2917                + nla_total_size(4); /* NDA_PROBES */
2918 }
2919
2920 static void __neigh_notify(struct neighbour *n, int type, int flags,
2921                            u32 pid)
2922 {
2923         struct net *net = dev_net(n->dev);
2924         struct sk_buff *skb;
2925         int err = -ENOBUFS;
2926
2927         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2928         if (skb == NULL)
2929                 goto errout;
2930
2931         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2932         if (err < 0) {
2933                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2934                 WARN_ON(err == -EMSGSIZE);
2935                 kfree_skb(skb);
2936                 goto errout;
2937         }
2938         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2939         return;
2940 errout:
2941         if (err < 0)
2942                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2943 }
2944
2945 void neigh_app_ns(struct neighbour *n)
2946 {
2947         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2948 }
2949 EXPORT_SYMBOL(neigh_app_ns);
2950
2951 #ifdef CONFIG_SYSCTL
2952 static int zero;
2953 static int int_max = INT_MAX;
2954 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2955
2956 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2957                            void __user *buffer, size_t *lenp, loff_t *ppos)
2958 {
2959         int size, ret;
2960         struct ctl_table tmp = *ctl;
2961
2962         tmp.extra1 = &zero;
2963         tmp.extra2 = &unres_qlen_max;
2964         tmp.data = &size;
2965
2966         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2967         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2968
2969         if (write && !ret)
2970                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2971         return ret;
2972 }
2973
2974 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2975                                                    int family)
2976 {
2977         switch (family) {
2978         case AF_INET:
2979                 return __in_dev_arp_parms_get_rcu(dev);
2980         case AF_INET6:
2981                 return __in6_dev_nd_parms_get_rcu(dev);
2982         }
2983         return NULL;
2984 }
2985
2986 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2987                                   int index)
2988 {
2989         struct net_device *dev;
2990         int family = neigh_parms_family(p);
2991
2992         rcu_read_lock();
2993         for_each_netdev_rcu(net, dev) {
2994                 struct neigh_parms *dst_p =
2995                                 neigh_get_dev_parms_rcu(dev, family);
2996
2997                 if (dst_p && !test_bit(index, dst_p->data_state))
2998                         dst_p->data[index] = p->data[index];
2999         }
3000         rcu_read_unlock();
3001 }
3002
3003 static void neigh_proc_update(struct ctl_table *ctl, int write)
3004 {
3005         struct net_device *dev = ctl->extra1;
3006         struct neigh_parms *p = ctl->extra2;
3007         struct net *net = neigh_parms_net(p);
3008         int index = (int *) ctl->data - p->data;
3009
3010         if (!write)
3011                 return;
3012
3013         set_bit(index, p->data_state);
3014         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3015                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3016         if (!dev) /* NULL dev means this is default value */
3017                 neigh_copy_dflt_parms(net, p, index);
3018 }
3019
3020 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3021                                            void __user *buffer,
3022                                            size_t *lenp, loff_t *ppos)
3023 {
3024         struct ctl_table tmp = *ctl;
3025         int ret;
3026
3027         tmp.extra1 = &zero;
3028         tmp.extra2 = &int_max;
3029
3030         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3031         neigh_proc_update(ctl, write);
3032         return ret;
3033 }
3034
3035 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3036                         void __user *buffer, size_t *lenp, loff_t *ppos)
3037 {
3038         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3039
3040         neigh_proc_update(ctl, write);
3041         return ret;
3042 }
3043 EXPORT_SYMBOL(neigh_proc_dointvec);
3044
3045 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3046                                 void __user *buffer,
3047                                 size_t *lenp, loff_t *ppos)
3048 {
3049         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3050
3051         neigh_proc_update(ctl, write);
3052         return ret;
3053 }
3054 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3055
3056 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3057                                               void __user *buffer,
3058                                               size_t *lenp, loff_t *ppos)
3059 {
3060         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3061
3062         neigh_proc_update(ctl, write);
3063         return ret;
3064 }
3065
3066 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3067                                    void __user *buffer,
3068                                    size_t *lenp, loff_t *ppos)
3069 {
3070         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3071
3072         neigh_proc_update(ctl, write);
3073         return ret;
3074 }
3075 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3076
3077 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3078                                           void __user *buffer,
3079                                           size_t *lenp, loff_t *ppos)
3080 {
3081         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3082
3083         neigh_proc_update(ctl, write);
3084         return ret;
3085 }
3086
3087 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3088                                           void __user *buffer,
3089                                           size_t *lenp, loff_t *ppos)
3090 {
3091         struct neigh_parms *p = ctl->extra2;
3092         int ret;
3093
3094         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3095                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3096         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3097                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3098         else
3099                 ret = -1;
3100
3101         if (write && ret == 0) {
3102                 /* update reachable_time as well, otherwise, the change will
3103                  * only be effective after the next time neigh_periodic_work
3104                  * decides to recompute it
3105                  */
3106                 p->reachable_time =
3107                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3108         }
3109         return ret;
3110 }
3111
3112 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3113         (&((struct neigh_parms *) 0)->data[index])
3114
3115 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3116         [NEIGH_VAR_ ## attr] = { \
3117                 .procname       = name, \
3118                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3119                 .maxlen         = sizeof(int), \
3120                 .mode           = mval, \
3121                 .proc_handler   = proc, \
3122         }
3123
3124 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3125         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3126
3127 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3128         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3129
3130 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3131         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3132
3133 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3134         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3135
3136 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3137         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3138
3139 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3140         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3141
3142 static struct neigh_sysctl_table {
3143         struct ctl_table_header *sysctl_header;
3144         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3145 } neigh_sysctl_template __read_mostly = {
3146         .neigh_vars = {
3147                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3148                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3149                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3150                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3151                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3152                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3153                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3154                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3155                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3156                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3157                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3158                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3159                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3160                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3161                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3162                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3163                 [NEIGH_VAR_GC_INTERVAL] = {
3164                         .procname       = "gc_interval",
3165                         .maxlen         = sizeof(int),
3166                         .mode           = 0644,
3167                         .proc_handler   = proc_dointvec_jiffies,
3168                 },
3169                 [NEIGH_VAR_GC_THRESH1] = {
3170                         .procname       = "gc_thresh1",
3171                         .maxlen         = sizeof(int),
3172                         .mode           = 0644,
3173                         .extra1         = &zero,
3174                         .extra2         = &int_max,
3175                         .proc_handler   = proc_dointvec_minmax,
3176                 },
3177                 [NEIGH_VAR_GC_THRESH2] = {
3178                         .procname       = "gc_thresh2",
3179                         .maxlen         = sizeof(int),
3180                         .mode           = 0644,
3181                         .extra1         = &zero,
3182                         .extra2         = &int_max,
3183                         .proc_handler   = proc_dointvec_minmax,
3184                 },
3185                 [NEIGH_VAR_GC_THRESH3] = {
3186                         .procname       = "gc_thresh3",
3187                         .maxlen         = sizeof(int),
3188                         .mode           = 0644,
3189                         .extra1         = &zero,
3190                         .extra2         = &int_max,
3191                         .proc_handler   = proc_dointvec_minmax,
3192                 },
3193                 {},
3194         },
3195 };
3196
3197 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3198                           proc_handler *handler)
3199 {
3200         int i;
3201         struct neigh_sysctl_table *t;
3202         const char *dev_name_source;
3203         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3204         char *p_name;
3205
3206         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3207         if (!t)
3208                 goto err;
3209
3210         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3211                 t->neigh_vars[i].data += (long) p;
3212                 t->neigh_vars[i].extra1 = dev;
3213                 t->neigh_vars[i].extra2 = p;
3214         }
3215
3216         if (dev) {
3217                 dev_name_source = dev->name;
3218                 /* Terminate the table early */
3219                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3220                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3221         } else {
3222                 struct neigh_table *tbl = p->tbl;
3223                 dev_name_source = "default";
3224                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3225                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3226                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3227                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3228         }
3229
3230         if (handler) {
3231                 /* RetransTime */
3232                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3233                 /* ReachableTime */
3234                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3235                 /* RetransTime (in milliseconds)*/
3236                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3237                 /* ReachableTime (in milliseconds) */
3238                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3239         } else {
3240                 /* Those handlers will update p->reachable_time after
3241                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3242                  * applied after the next neighbour update instead of waiting for
3243                  * neigh_periodic_work to update its value (can be multiple minutes)
3244                  * So any handler that replaces them should do this as well
3245                  */
3246                 /* ReachableTime */
3247                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3248                         neigh_proc_base_reachable_time;
3249                 /* ReachableTime (in milliseconds) */
3250                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3251                         neigh_proc_base_reachable_time;
3252         }
3253
3254         /* Don't export sysctls to unprivileged users */
3255         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3256                 t->neigh_vars[0].procname = NULL;
3257
3258         switch (neigh_parms_family(p)) {
3259         case AF_INET:
3260               p_name = "ipv4";
3261               break;
3262         case AF_INET6:
3263               p_name = "ipv6";
3264               break;
3265         default:
3266               BUG();
3267         }
3268
3269         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3270                 p_name, dev_name_source);
3271         t->sysctl_header =
3272                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3273         if (!t->sysctl_header)
3274                 goto free;
3275
3276         p->sysctl_table = t;
3277         return 0;
3278
3279 free:
3280         kfree(t);
3281 err:
3282         return -ENOBUFS;
3283 }
3284 EXPORT_SYMBOL(neigh_sysctl_register);
3285
3286 void neigh_sysctl_unregister(struct neigh_parms *p)
3287 {
3288         if (p->sysctl_table) {
3289                 struct neigh_sysctl_table *t = p->sysctl_table;
3290                 p->sysctl_table = NULL;
3291                 unregister_net_sysctl_table(t->sysctl_header);
3292                 kfree(t);
3293         }
3294 }
3295 EXPORT_SYMBOL(neigh_sysctl_unregister);
3296
3297 #endif  /* CONFIG_SYSCTL */
3298
3299 static int __init neigh_init(void)
3300 {
3301         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3302         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3303         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3304
3305         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3306                       0);
3307         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3308
3309         return 0;
3310 }
3311
3312 subsys_initcall(neigh_init);
3313