GNU Linux-libre 4.14.332-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags,
58                            u32 pid);
59 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
60 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
61                                     struct net_device *dev);
62
63 #ifdef CONFIG_PROC_FS
64 static const struct file_operations neigh_stat_seq_fops;
65 #endif
66
67 /*
68    Neighbour hash table buckets are protected with rwlock tbl->lock.
69
70    - All the scans/updates to hash buckets MUST be made under this lock.
71    - NOTHING clever should be made under this lock: no callbacks
72      to protocol backends, no attempts to send something to network.
73      It will result in deadlocks, if backend/driver wants to use neighbour
74      cache.
75    - If the entry requires some non-trivial actions, increase
76      its reference count and release table lock.
77
78    Neighbour entries are protected:
79    - with reference count.
80    - with rwlock neigh->lock
81
82    Reference count prevents destruction.
83
84    neigh->lock mainly serializes ll address data and its validity state.
85    However, the same lock is used to protect another entry fields:
86     - timer
87     - resolution queue
88
89    Again, nothing clever shall be made under neigh->lock,
90    the most complicated procedure, which we allow is dev->hard_header.
91    It is supposed, that dev->hard_header is simplistic and does
92    not make callbacks to neighbour tables.
93  */
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
107         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static bool neigh_del(struct neighbour *n, __u8 state,
125                       struct neighbour __rcu **np, struct neigh_table *tbl)
126 {
127         bool retval = false;
128
129         write_lock(&n->lock);
130         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
131                 struct neighbour *neigh;
132
133                 neigh = rcu_dereference_protected(n->next,
134                                                   lockdep_is_held(&tbl->lock));
135                 rcu_assign_pointer(*np, neigh);
136                 n->dead = 1;
137                 retval = true;
138         }
139         write_unlock(&n->lock);
140         if (retval)
141                 neigh_cleanup_and_release(n);
142         return retval;
143 }
144
145 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
146 {
147         struct neigh_hash_table *nht;
148         void *pkey = ndel->primary_key;
149         u32 hash_val;
150         struct neighbour *n;
151         struct neighbour __rcu **np;
152
153         nht = rcu_dereference_protected(tbl->nht,
154                                         lockdep_is_held(&tbl->lock));
155         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
156         hash_val = hash_val >> (32 - nht->hash_shift);
157
158         np = &nht->hash_buckets[hash_val];
159         while ((n = rcu_dereference_protected(*np,
160                                               lockdep_is_held(&tbl->lock)))) {
161                 if (n == ndel)
162                         return neigh_del(n, 0, np, tbl);
163                 np = &n->next;
164         }
165         return false;
166 }
167
168 static int neigh_forced_gc(struct neigh_table *tbl)
169 {
170         int shrunk = 0;
171         int i;
172         struct neigh_hash_table *nht;
173
174         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
175
176         write_lock_bh(&tbl->lock);
177         nht = rcu_dereference_protected(tbl->nht,
178                                         lockdep_is_held(&tbl->lock));
179         for (i = 0; i < (1 << nht->hash_shift); i++) {
180                 struct neighbour *n;
181                 struct neighbour __rcu **np;
182
183                 np = &nht->hash_buckets[i];
184                 while ((n = rcu_dereference_protected(*np,
185                                         lockdep_is_held(&tbl->lock))) != NULL) {
186                         /* Neighbour record may be discarded if:
187                          * - nobody refers to it.
188                          * - it is not permanent
189                          */
190                         if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
191                                 shrunk = 1;
192                                 continue;
193                         }
194                         np = &n->next;
195                 }
196         }
197
198         tbl->last_flush = jiffies;
199
200         write_unlock_bh(&tbl->lock);
201
202         return shrunk;
203 }
204
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207         neigh_hold(n);
208         if (unlikely(mod_timer(&n->timer, when))) {
209                 printk("NEIGH: BUG, double timer add, state is %x\n",
210                        n->nud_state);
211                 dump_stack();
212         }
213 }
214
215 static int neigh_del_timer(struct neighbour *n)
216 {
217         if ((n->nud_state & NUD_IN_TIMER) &&
218             del_timer(&n->timer)) {
219                 neigh_release(n);
220                 return 1;
221         }
222         return 0;
223 }
224
225 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
226 {
227         struct sk_buff_head tmp;
228         unsigned long flags;
229         struct sk_buff *skb;
230
231         skb_queue_head_init(&tmp);
232         spin_lock_irqsave(&list->lock, flags);
233         skb = skb_peek(list);
234         while (skb != NULL) {
235                 struct sk_buff *skb_next = skb_peek_next(skb, list);
236                 if (net == NULL || net_eq(dev_net(skb->dev), net)) {
237                         __skb_unlink(skb, list);
238                         __skb_queue_tail(&tmp, skb);
239                 }
240                 skb = skb_next;
241         }
242         spin_unlock_irqrestore(&list->lock, flags);
243
244         while ((skb = __skb_dequeue(&tmp))) {
245                 dev_put(skb->dev);
246                 kfree_skb(skb);
247         }
248 }
249
250 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
251 {
252         int i;
253         struct neigh_hash_table *nht;
254
255         nht = rcu_dereference_protected(tbl->nht,
256                                         lockdep_is_held(&tbl->lock));
257
258         for (i = 0; i < (1 << nht->hash_shift); i++) {
259                 struct neighbour *n;
260                 struct neighbour __rcu **np = &nht->hash_buckets[i];
261
262                 while ((n = rcu_dereference_protected(*np,
263                                         lockdep_is_held(&tbl->lock))) != NULL) {
264                         if (dev && n->dev != dev) {
265                                 np = &n->next;
266                                 continue;
267                         }
268                         rcu_assign_pointer(*np,
269                                    rcu_dereference_protected(n->next,
270                                                 lockdep_is_held(&tbl->lock)));
271                         write_lock(&n->lock);
272                         neigh_del_timer(n);
273                         n->dead = 1;
274
275                         if (refcount_read(&n->refcnt) != 1) {
276                                 /* The most unpleasant situation.
277                                    We must destroy neighbour entry,
278                                    but someone still uses it.
279
280                                    The destroy will be delayed until
281                                    the last user releases us, but
282                                    we must kill timers etc. and move
283                                    it to safe state.
284                                  */
285                                 __skb_queue_purge(&n->arp_queue);
286                                 n->arp_queue_len_bytes = 0;
287                                 n->output = neigh_blackhole;
288                                 if (n->nud_state & NUD_VALID)
289                                         n->nud_state = NUD_NOARP;
290                                 else
291                                         n->nud_state = NUD_NONE;
292                                 neigh_dbg(2, "neigh %p is stray\n", n);
293                         }
294                         write_unlock(&n->lock);
295                         neigh_cleanup_and_release(n);
296                 }
297         }
298 }
299
300 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
301 {
302         write_lock_bh(&tbl->lock);
303         neigh_flush_dev(tbl, dev);
304         write_unlock_bh(&tbl->lock);
305 }
306 EXPORT_SYMBOL(neigh_changeaddr);
307
308 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
309 {
310         write_lock_bh(&tbl->lock);
311         neigh_flush_dev(tbl, dev);
312         pneigh_ifdown_and_unlock(tbl, dev);
313         pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
314         if (skb_queue_empty_lockless(&tbl->proxy_queue))
315                 del_timer_sync(&tbl->proxy_timer);
316         return 0;
317 }
318 EXPORT_SYMBOL(neigh_ifdown);
319
320 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
321 {
322         struct neighbour *n = NULL;
323         unsigned long now = jiffies;
324         int entries;
325
326         entries = atomic_inc_return(&tbl->entries) - 1;
327         if (entries >= tbl->gc_thresh3 ||
328             (entries >= tbl->gc_thresh2 &&
329              time_after(now, tbl->last_flush + 5 * HZ))) {
330                 if (!neigh_forced_gc(tbl) &&
331                     entries >= tbl->gc_thresh3) {
332                         net_info_ratelimited("%s: neighbor table overflow!\n",
333                                              tbl->id);
334                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
335                         goto out_entries;
336                 }
337         }
338
339         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
340         if (!n)
341                 goto out_entries;
342
343         __skb_queue_head_init(&n->arp_queue);
344         rwlock_init(&n->lock);
345         seqlock_init(&n->ha_lock);
346         n->updated        = n->used = now;
347         n->nud_state      = NUD_NONE;
348         n->output         = neigh_blackhole;
349         seqlock_init(&n->hh.hh_lock);
350         n->parms          = neigh_parms_clone(&tbl->parms);
351         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
352
353         NEIGH_CACHE_STAT_INC(tbl, allocs);
354         n->tbl            = tbl;
355         refcount_set(&n->refcnt, 1);
356         n->dead           = 1;
357 out:
358         return n;
359
360 out_entries:
361         atomic_dec(&tbl->entries);
362         goto out;
363 }
364
365 static void neigh_get_hash_rnd(u32 *x)
366 {
367         *x = get_random_u32() | 1;
368 }
369
370 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
371 {
372         size_t size = (1 << shift) * sizeof(struct neighbour *);
373         struct neigh_hash_table *ret;
374         struct neighbour __rcu **buckets;
375         int i;
376
377         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
378         if (!ret)
379                 return NULL;
380         if (size <= PAGE_SIZE) {
381                 buckets = kzalloc(size, GFP_ATOMIC);
382         } else {
383                 buckets = (struct neighbour __rcu **)
384                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
385                                            get_order(size));
386                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
387         }
388         if (!buckets) {
389                 kfree(ret);
390                 return NULL;
391         }
392         ret->hash_buckets = buckets;
393         ret->hash_shift = shift;
394         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
395                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
396         return ret;
397 }
398
399 static void neigh_hash_free_rcu(struct rcu_head *head)
400 {
401         struct neigh_hash_table *nht = container_of(head,
402                                                     struct neigh_hash_table,
403                                                     rcu);
404         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
405         struct neighbour __rcu **buckets = nht->hash_buckets;
406
407         if (size <= PAGE_SIZE) {
408                 kfree(buckets);
409         } else {
410                 kmemleak_free(buckets);
411                 free_pages((unsigned long)buckets, get_order(size));
412         }
413         kfree(nht);
414 }
415
416 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
417                                                 unsigned long new_shift)
418 {
419         unsigned int i, hash;
420         struct neigh_hash_table *new_nht, *old_nht;
421
422         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
423
424         old_nht = rcu_dereference_protected(tbl->nht,
425                                             lockdep_is_held(&tbl->lock));
426         new_nht = neigh_hash_alloc(new_shift);
427         if (!new_nht)
428                 return old_nht;
429
430         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
431                 struct neighbour *n, *next;
432
433                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
434                                                    lockdep_is_held(&tbl->lock));
435                      n != NULL;
436                      n = next) {
437                         hash = tbl->hash(n->primary_key, n->dev,
438                                          new_nht->hash_rnd);
439
440                         hash >>= (32 - new_nht->hash_shift);
441                         next = rcu_dereference_protected(n->next,
442                                                 lockdep_is_held(&tbl->lock));
443
444                         rcu_assign_pointer(n->next,
445                                            rcu_dereference_protected(
446                                                 new_nht->hash_buckets[hash],
447                                                 lockdep_is_held(&tbl->lock)));
448                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
449                 }
450         }
451
452         rcu_assign_pointer(tbl->nht, new_nht);
453         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
454         return new_nht;
455 }
456
457 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
458                                struct net_device *dev)
459 {
460         struct neighbour *n;
461
462         NEIGH_CACHE_STAT_INC(tbl, lookups);
463
464         rcu_read_lock_bh();
465         n = __neigh_lookup_noref(tbl, pkey, dev);
466         if (n) {
467                 if (!refcount_inc_not_zero(&n->refcnt))
468                         n = NULL;
469                 NEIGH_CACHE_STAT_INC(tbl, hits);
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup);
476
477 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478                                  struct net_device *dev, bool want_ref)
479 {
480         u32 hash_val;
481         int key_len = tbl->key_len;
482         int error;
483         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484         struct neigh_hash_table *nht;
485
486         if (!n) {
487                 rc = ERR_PTR(-ENOBUFS);
488                 goto out;
489         }
490
491         memcpy(n->primary_key, pkey, key_len);
492         n->dev = dev;
493         dev_hold(dev);
494
495         /* Protocol specific setup. */
496         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         if (dev->netdev_ops->ndo_neigh_construct) {
502                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
503                 if (error < 0) {
504                         rc = ERR_PTR(error);
505                         goto out_neigh_release;
506                 }
507         }
508
509         /* Device specific setup. */
510         if (n->parms->neigh_setup &&
511             (error = n->parms->neigh_setup(n)) < 0) {
512                 rc = ERR_PTR(error);
513                 goto out_neigh_release;
514         }
515
516         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
517
518         write_lock_bh(&tbl->lock);
519         nht = rcu_dereference_protected(tbl->nht,
520                                         lockdep_is_held(&tbl->lock));
521
522         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524
525         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526
527         if (n->parms->dead) {
528                 rc = ERR_PTR(-EINVAL);
529                 goto out_tbl_unlock;
530         }
531
532         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533                                             lockdep_is_held(&tbl->lock));
534              n1 != NULL;
535              n1 = rcu_dereference_protected(n1->next,
536                         lockdep_is_held(&tbl->lock))) {
537                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
538                         if (want_ref)
539                                 neigh_hold(n1);
540                         rc = n1;
541                         goto out_tbl_unlock;
542                 }
543         }
544
545         n->dead = 0;
546         if (want_ref)
547                 neigh_hold(n);
548         rcu_assign_pointer(n->next,
549                            rcu_dereference_protected(nht->hash_buckets[hash_val],
550                                                      lockdep_is_held(&tbl->lock)));
551         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
552         write_unlock_bh(&tbl->lock);
553         neigh_dbg(2, "neigh %p is created\n", n);
554         rc = n;
555 out:
556         return rc;
557 out_tbl_unlock:
558         write_unlock_bh(&tbl->lock);
559 out_neigh_release:
560         neigh_release(n);
561         goto out;
562 }
563 EXPORT_SYMBOL(__neigh_create);
564
565 static u32 pneigh_hash(const void *pkey, int key_len)
566 {
567         u32 hash_val = *(u32 *)(pkey + key_len - 4);
568         hash_val ^= (hash_val >> 16);
569         hash_val ^= hash_val >> 8;
570         hash_val ^= hash_val >> 4;
571         hash_val &= PNEIGH_HASHMASK;
572         return hash_val;
573 }
574
575 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
576                                               struct net *net,
577                                               const void *pkey,
578                                               int key_len,
579                                               struct net_device *dev)
580 {
581         while (n) {
582                 if (!memcmp(n->key, pkey, key_len) &&
583                     net_eq(pneigh_net(n), net) &&
584                     (n->dev == dev || !n->dev))
585                         return n;
586                 n = n->next;
587         }
588         return NULL;
589 }
590
591 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
592                 struct net *net, const void *pkey, struct net_device *dev)
593 {
594         int key_len = tbl->key_len;
595         u32 hash_val = pneigh_hash(pkey, key_len);
596
597         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
598                                  net, pkey, key_len, dev);
599 }
600 EXPORT_SYMBOL_GPL(__pneigh_lookup);
601
602 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
603                                     struct net *net, const void *pkey,
604                                     struct net_device *dev, int creat)
605 {
606         struct pneigh_entry *n;
607         int key_len = tbl->key_len;
608         u32 hash_val = pneigh_hash(pkey, key_len);
609
610         read_lock_bh(&tbl->lock);
611         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
612                               net, pkey, key_len, dev);
613         read_unlock_bh(&tbl->lock);
614
615         if (n || !creat)
616                 goto out;
617
618         ASSERT_RTNL();
619
620         n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
621         if (!n)
622                 goto out;
623
624         write_pnet(&n->net, net);
625         memcpy(n->key, pkey, key_len);
626         n->dev = dev;
627         if (dev)
628                 dev_hold(dev);
629
630         if (tbl->pconstructor && tbl->pconstructor(n)) {
631                 if (dev)
632                         dev_put(dev);
633                 kfree(n);
634                 n = NULL;
635                 goto out;
636         }
637
638         write_lock_bh(&tbl->lock);
639         n->next = tbl->phash_buckets[hash_val];
640         tbl->phash_buckets[hash_val] = n;
641         write_unlock_bh(&tbl->lock);
642 out:
643         return n;
644 }
645 EXPORT_SYMBOL(pneigh_lookup);
646
647
648 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
649                   struct net_device *dev)
650 {
651         struct pneigh_entry *n, **np;
652         int key_len = tbl->key_len;
653         u32 hash_val = pneigh_hash(pkey, key_len);
654
655         write_lock_bh(&tbl->lock);
656         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
657              np = &n->next) {
658                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
659                     net_eq(pneigh_net(n), net)) {
660                         *np = n->next;
661                         write_unlock_bh(&tbl->lock);
662                         if (tbl->pdestructor)
663                                 tbl->pdestructor(n);
664                         if (n->dev)
665                                 dev_put(n->dev);
666                         kfree(n);
667                         return 0;
668                 }
669         }
670         write_unlock_bh(&tbl->lock);
671         return -ENOENT;
672 }
673
674 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
675                                     struct net_device *dev)
676 {
677         struct pneigh_entry *n, **np, *freelist = NULL;
678         u32 h;
679
680         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
681                 np = &tbl->phash_buckets[h];
682                 while ((n = *np) != NULL) {
683                         if (!dev || n->dev == dev) {
684                                 *np = n->next;
685                                 n->next = freelist;
686                                 freelist = n;
687                                 continue;
688                         }
689                         np = &n->next;
690                 }
691         }
692         write_unlock_bh(&tbl->lock);
693         while ((n = freelist)) {
694                 freelist = n->next;
695                 n->next = NULL;
696                 if (tbl->pdestructor)
697                         tbl->pdestructor(n);
698                 if (n->dev)
699                         dev_put(n->dev);
700                 kfree(n);
701         }
702         return -ENOENT;
703 }
704
705 static void neigh_parms_destroy(struct neigh_parms *parms);
706
707 static inline void neigh_parms_put(struct neigh_parms *parms)
708 {
709         if (refcount_dec_and_test(&parms->refcnt))
710                 neigh_parms_destroy(parms);
711 }
712
713 /*
714  *      neighbour must already be out of the table;
715  *
716  */
717 void neigh_destroy(struct neighbour *neigh)
718 {
719         struct net_device *dev = neigh->dev;
720
721         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
722
723         if (!neigh->dead) {
724                 pr_warn("Destroying alive neighbour %p\n", neigh);
725                 dump_stack();
726                 return;
727         }
728
729         if (neigh_del_timer(neigh))
730                 pr_warn("Impossible event\n");
731
732         write_lock_bh(&neigh->lock);
733         __skb_queue_purge(&neigh->arp_queue);
734         write_unlock_bh(&neigh->lock);
735         neigh->arp_queue_len_bytes = 0;
736
737         if (dev->netdev_ops->ndo_neigh_destroy)
738                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
739
740         dev_put(dev);
741         neigh_parms_put(neigh->parms);
742
743         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
744
745         atomic_dec(&neigh->tbl->entries);
746         kfree_rcu(neigh, rcu);
747 }
748 EXPORT_SYMBOL(neigh_destroy);
749
750 /* Neighbour state is suspicious;
751    disable fast path.
752
753    Called with write_locked neigh.
754  */
755 static void neigh_suspect(struct neighbour *neigh)
756 {
757         neigh_dbg(2, "neigh %p is suspected\n", neigh);
758
759         neigh->output = neigh->ops->output;
760 }
761
762 /* Neighbour state is OK;
763    enable fast path.
764
765    Called with write_locked neigh.
766  */
767 static void neigh_connect(struct neighbour *neigh)
768 {
769         neigh_dbg(2, "neigh %p is connected\n", neigh);
770
771         neigh->output = neigh->ops->connected_output;
772 }
773
774 static void neigh_periodic_work(struct work_struct *work)
775 {
776         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
777         struct neighbour *n;
778         struct neighbour __rcu **np;
779         unsigned int i;
780         struct neigh_hash_table *nht;
781
782         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
783
784         write_lock_bh(&tbl->lock);
785         nht = rcu_dereference_protected(tbl->nht,
786                                         lockdep_is_held(&tbl->lock));
787
788         /*
789          *      periodically recompute ReachableTime from random function
790          */
791
792         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
793                 struct neigh_parms *p;
794                 tbl->last_rand = jiffies;
795                 list_for_each_entry(p, &tbl->parms_list, list)
796                         p->reachable_time =
797                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
798         }
799
800         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
801                 goto out;
802
803         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
804                 np = &nht->hash_buckets[i];
805
806                 while ((n = rcu_dereference_protected(*np,
807                                 lockdep_is_held(&tbl->lock))) != NULL) {
808                         unsigned int state;
809
810                         write_lock(&n->lock);
811
812                         state = n->nud_state;
813                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
814                                 write_unlock(&n->lock);
815                                 goto next_elt;
816                         }
817
818                         if (time_before(n->used, n->confirmed))
819                                 n->used = n->confirmed;
820
821                         if (refcount_read(&n->refcnt) == 1 &&
822                             (state == NUD_FAILED ||
823                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
824                                 *np = n->next;
825                                 n->dead = 1;
826                                 write_unlock(&n->lock);
827                                 neigh_cleanup_and_release(n);
828                                 continue;
829                         }
830                         write_unlock(&n->lock);
831
832 next_elt:
833                         np = &n->next;
834                 }
835                 /*
836                  * It's fine to release lock here, even if hash table
837                  * grows while we are preempted.
838                  */
839                 write_unlock_bh(&tbl->lock);
840                 cond_resched();
841                 write_lock_bh(&tbl->lock);
842                 nht = rcu_dereference_protected(tbl->nht,
843                                                 lockdep_is_held(&tbl->lock));
844         }
845 out:
846         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
847          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
848          * BASE_REACHABLE_TIME.
849          */
850         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
851                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
852         write_unlock_bh(&tbl->lock);
853 }
854
855 static __inline__ int neigh_max_probes(struct neighbour *n)
856 {
857         struct neigh_parms *p = n->parms;
858         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
859                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
860                 NEIGH_VAR(p, MCAST_PROBES));
861 }
862
863 static void neigh_invalidate(struct neighbour *neigh)
864         __releases(neigh->lock)
865         __acquires(neigh->lock)
866 {
867         struct sk_buff *skb;
868
869         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
870         neigh_dbg(2, "neigh %p is failed\n", neigh);
871         neigh->updated = jiffies;
872
873         /* It is very thin place. report_unreachable is very complicated
874            routine. Particularly, it can hit the same neighbour entry!
875
876            So that, we try to be accurate and avoid dead loop. --ANK
877          */
878         while (neigh->nud_state == NUD_FAILED &&
879                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
880                 write_unlock(&neigh->lock);
881                 neigh->ops->error_report(neigh, skb);
882                 write_lock(&neigh->lock);
883         }
884         __skb_queue_purge(&neigh->arp_queue);
885         neigh->arp_queue_len_bytes = 0;
886 }
887
888 static void neigh_probe(struct neighbour *neigh)
889         __releases(neigh->lock)
890 {
891         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
892         /* keep skb alive even if arp_queue overflows */
893         if (skb)
894                 skb = skb_clone(skb, GFP_ATOMIC);
895         write_unlock(&neigh->lock);
896         if (neigh->ops->solicit)
897                 neigh->ops->solicit(neigh, skb);
898         atomic_inc(&neigh->probes);
899         kfree_skb(skb);
900 }
901
902 /* Called when a timer expires for a neighbour entry. */
903
904 static void neigh_timer_handler(unsigned long arg)
905 {
906         unsigned long now, next;
907         struct neighbour *neigh = (struct neighbour *)arg;
908         unsigned int state;
909         int notify = 0;
910
911         write_lock(&neigh->lock);
912
913         state = neigh->nud_state;
914         now = jiffies;
915         next = now + HZ;
916
917         if (!(state & NUD_IN_TIMER))
918                 goto out;
919
920         if (state & NUD_REACHABLE) {
921                 if (time_before_eq(now,
922                                    neigh->confirmed + neigh->parms->reachable_time)) {
923                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
924                         next = neigh->confirmed + neigh->parms->reachable_time;
925                 } else if (time_before_eq(now,
926                                           neigh->used +
927                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
928                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
929                         neigh->nud_state = NUD_DELAY;
930                         neigh->updated = jiffies;
931                         neigh_suspect(neigh);
932                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
933                 } else {
934                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
935                         neigh->nud_state = NUD_STALE;
936                         neigh->updated = jiffies;
937                         neigh_suspect(neigh);
938                         notify = 1;
939                 }
940         } else if (state & NUD_DELAY) {
941                 if (time_before_eq(now,
942                                    neigh->confirmed +
943                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
944                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
945                         neigh->nud_state = NUD_REACHABLE;
946                         neigh->updated = jiffies;
947                         neigh_connect(neigh);
948                         notify = 1;
949                         next = neigh->confirmed + neigh->parms->reachable_time;
950                 } else {
951                         neigh_dbg(2, "neigh %p is probed\n", neigh);
952                         neigh->nud_state = NUD_PROBE;
953                         neigh->updated = jiffies;
954                         atomic_set(&neigh->probes, 0);
955                         notify = 1;
956                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
957                 }
958         } else {
959                 /* NUD_PROBE|NUD_INCOMPLETE */
960                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
961         }
962
963         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
964             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
965                 neigh->nud_state = NUD_FAILED;
966                 notify = 1;
967                 neigh_invalidate(neigh);
968                 goto out;
969         }
970
971         if (neigh->nud_state & NUD_IN_TIMER) {
972                 if (time_before(next, jiffies + HZ/2))
973                         next = jiffies + HZ/2;
974                 if (!mod_timer(&neigh->timer, next))
975                         neigh_hold(neigh);
976         }
977         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
978                 neigh_probe(neigh);
979         } else {
980 out:
981                 write_unlock(&neigh->lock);
982         }
983
984         if (notify)
985                 neigh_update_notify(neigh, 0);
986
987         neigh_release(neigh);
988 }
989
990 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
991 {
992         int rc;
993         bool immediate_probe = false;
994
995         write_lock_bh(&neigh->lock);
996
997         rc = 0;
998         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
999                 goto out_unlock_bh;
1000         if (neigh->dead)
1001                 goto out_dead;
1002
1003         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1004                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1005                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1006                         unsigned long next, now = jiffies;
1007
1008                         atomic_set(&neigh->probes,
1009                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1010                         neigh_del_timer(neigh);
1011                         neigh->nud_state     = NUD_INCOMPLETE;
1012                         neigh->updated = now;
1013                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1014                                          HZ/2);
1015                         neigh_add_timer(neigh, next);
1016                         immediate_probe = true;
1017                 } else {
1018                         neigh->nud_state = NUD_FAILED;
1019                         neigh->updated = jiffies;
1020                         write_unlock_bh(&neigh->lock);
1021
1022                         kfree_skb(skb);
1023                         return 1;
1024                 }
1025         } else if (neigh->nud_state & NUD_STALE) {
1026                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1027                 neigh_del_timer(neigh);
1028                 neigh->nud_state = NUD_DELAY;
1029                 neigh->updated = jiffies;
1030                 neigh_add_timer(neigh, jiffies +
1031                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1032         }
1033
1034         if (neigh->nud_state == NUD_INCOMPLETE) {
1035                 if (skb) {
1036                         while (neigh->arp_queue_len_bytes + skb->truesize >
1037                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1038                                 struct sk_buff *buff;
1039
1040                                 buff = __skb_dequeue(&neigh->arp_queue);
1041                                 if (!buff)
1042                                         break;
1043                                 neigh->arp_queue_len_bytes -= buff->truesize;
1044                                 kfree_skb(buff);
1045                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1046                         }
1047                         skb_dst_force(skb);
1048                         __skb_queue_tail(&neigh->arp_queue, skb);
1049                         neigh->arp_queue_len_bytes += skb->truesize;
1050                 }
1051                 rc = 1;
1052         }
1053 out_unlock_bh:
1054         if (immediate_probe)
1055                 neigh_probe(neigh);
1056         else
1057                 write_unlock(&neigh->lock);
1058         local_bh_enable();
1059         return rc;
1060
1061 out_dead:
1062         if (neigh->nud_state & NUD_STALE)
1063                 goto out_unlock_bh;
1064         write_unlock_bh(&neigh->lock);
1065         kfree_skb(skb);
1066         return 1;
1067 }
1068 EXPORT_SYMBOL(__neigh_event_send);
1069
1070 static void neigh_update_hhs(struct neighbour *neigh)
1071 {
1072         struct hh_cache *hh;
1073         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1074                 = NULL;
1075
1076         if (neigh->dev->header_ops)
1077                 update = neigh->dev->header_ops->cache_update;
1078
1079         if (update) {
1080                 hh = &neigh->hh;
1081                 if (READ_ONCE(hh->hh_len)) {
1082                         write_seqlock_bh(&hh->hh_lock);
1083                         update(hh, neigh->dev, neigh->ha);
1084                         write_sequnlock_bh(&hh->hh_lock);
1085                 }
1086         }
1087 }
1088
1089
1090
1091 /* Generic update routine.
1092    -- lladdr is new lladdr or NULL, if it is not supplied.
1093    -- new    is new state.
1094    -- flags
1095         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1096                                 if it is different.
1097         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1098                                 lladdr instead of overriding it
1099                                 if it is different.
1100         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1101
1102         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1103                                 NTF_ROUTER flag.
1104         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1105                                 a router.
1106
1107    Caller MUST hold reference count on the entry.
1108  */
1109
1110 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1111                  u32 flags, u32 nlmsg_pid)
1112 {
1113         u8 old;
1114         int err;
1115         int notify = 0;
1116         struct net_device *dev;
1117         int update_isrouter = 0;
1118
1119         write_lock_bh(&neigh->lock);
1120
1121         dev    = neigh->dev;
1122         old    = neigh->nud_state;
1123         err    = -EPERM;
1124
1125         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1126             (old & (NUD_NOARP | NUD_PERMANENT)))
1127                 goto out;
1128         if (neigh->dead)
1129                 goto out;
1130
1131         if (!(new & NUD_VALID)) {
1132                 neigh_del_timer(neigh);
1133                 if (old & NUD_CONNECTED)
1134                         neigh_suspect(neigh);
1135                 neigh->nud_state = new;
1136                 err = 0;
1137                 notify = old & NUD_VALID;
1138                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1139                     (new & NUD_FAILED)) {
1140                         neigh_invalidate(neigh);
1141                         notify = 1;
1142                 }
1143                 goto out;
1144         }
1145
1146         /* Compare new lladdr with cached one */
1147         if (!dev->addr_len) {
1148                 /* First case: device needs no address. */
1149                 lladdr = neigh->ha;
1150         } else if (lladdr) {
1151                 /* The second case: if something is already cached
1152                    and a new address is proposed:
1153                    - compare new & old
1154                    - if they are different, check override flag
1155                  */
1156                 if ((old & NUD_VALID) &&
1157                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1158                         lladdr = neigh->ha;
1159         } else {
1160                 /* No address is supplied; if we know something,
1161                    use it, otherwise discard the request.
1162                  */
1163                 err = -EINVAL;
1164                 if (!(old & NUD_VALID))
1165                         goto out;
1166                 lladdr = neigh->ha;
1167         }
1168
1169         /* Update confirmed timestamp for neighbour entry after we
1170          * received ARP packet even if it doesn't change IP to MAC binding.
1171          */
1172         if (new & NUD_CONNECTED)
1173                 neigh->confirmed = jiffies;
1174
1175         /* If entry was valid and address is not changed,
1176            do not change entry state, if new one is STALE.
1177          */
1178         err = 0;
1179         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1180         if (old & NUD_VALID) {
1181                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1182                         update_isrouter = 0;
1183                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1184                             (old & NUD_CONNECTED)) {
1185                                 lladdr = neigh->ha;
1186                                 new = NUD_STALE;
1187                         } else
1188                                 goto out;
1189                 } else {
1190                         if (lladdr == neigh->ha && new == NUD_STALE &&
1191                             !(flags & NEIGH_UPDATE_F_ADMIN))
1192                                 new = old;
1193                 }
1194         }
1195
1196         /* Update timestamp only once we know we will make a change to the
1197          * neighbour entry. Otherwise we risk to move the locktime window with
1198          * noop updates and ignore relevant ARP updates.
1199          */
1200         if (new != old || lladdr != neigh->ha)
1201                 neigh->updated = jiffies;
1202
1203         if (new != old) {
1204                 neigh_del_timer(neigh);
1205                 if (new & NUD_PROBE)
1206                         atomic_set(&neigh->probes, 0);
1207                 if (new & NUD_IN_TIMER)
1208                         neigh_add_timer(neigh, (jiffies +
1209                                                 ((new & NUD_REACHABLE) ?
1210                                                  neigh->parms->reachable_time :
1211                                                  0)));
1212                 neigh->nud_state = new;
1213                 notify = 1;
1214         }
1215
1216         if (lladdr != neigh->ha) {
1217                 write_seqlock(&neigh->ha_lock);
1218                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1219                 write_sequnlock(&neigh->ha_lock);
1220                 neigh_update_hhs(neigh);
1221                 if (!(new & NUD_CONNECTED))
1222                         neigh->confirmed = jiffies -
1223                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1224                 notify = 1;
1225         }
1226         if (new == old)
1227                 goto out;
1228         if (new & NUD_CONNECTED)
1229                 neigh_connect(neigh);
1230         else
1231                 neigh_suspect(neigh);
1232         if (!(old & NUD_VALID)) {
1233                 struct sk_buff *skb;
1234
1235                 /* Again: avoid dead loop if something went wrong */
1236
1237                 while (neigh->nud_state & NUD_VALID &&
1238                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1239                         struct dst_entry *dst = skb_dst(skb);
1240                         struct neighbour *n2, *n1 = neigh;
1241                         write_unlock_bh(&neigh->lock);
1242
1243                         rcu_read_lock();
1244
1245                         /* Why not just use 'neigh' as-is?  The problem is that
1246                          * things such as shaper, eql, and sch_teql can end up
1247                          * using alternative, different, neigh objects to output
1248                          * the packet in the output path.  So what we need to do
1249                          * here is re-lookup the top-level neigh in the path so
1250                          * we can reinject the packet there.
1251                          */
1252                         n2 = NULL;
1253                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1254                                 n2 = dst_neigh_lookup_skb(dst, skb);
1255                                 if (n2)
1256                                         n1 = n2;
1257                         }
1258                         n1->output(n1, skb);
1259                         if (n2)
1260                                 neigh_release(n2);
1261                         rcu_read_unlock();
1262
1263                         write_lock_bh(&neigh->lock);
1264                 }
1265                 __skb_queue_purge(&neigh->arp_queue);
1266                 neigh->arp_queue_len_bytes = 0;
1267         }
1268 out:
1269         if (update_isrouter) {
1270                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1271                         (neigh->flags | NTF_ROUTER) :
1272                         (neigh->flags & ~NTF_ROUTER);
1273         }
1274         write_unlock_bh(&neigh->lock);
1275
1276         if (notify)
1277                 neigh_update_notify(neigh, nlmsg_pid);
1278
1279         return err;
1280 }
1281 EXPORT_SYMBOL(neigh_update);
1282
1283 /* Update the neigh to listen temporarily for probe responses, even if it is
1284  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1285  */
1286 void __neigh_set_probe_once(struct neighbour *neigh)
1287 {
1288         if (neigh->dead)
1289                 return;
1290         neigh->updated = jiffies;
1291         if (!(neigh->nud_state & NUD_FAILED))
1292                 return;
1293         neigh->nud_state = NUD_INCOMPLETE;
1294         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1295         neigh_add_timer(neigh,
1296                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1297 }
1298 EXPORT_SYMBOL(__neigh_set_probe_once);
1299
1300 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1301                                  u8 *lladdr, void *saddr,
1302                                  struct net_device *dev)
1303 {
1304         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1305                                                  lladdr || !dev->addr_len);
1306         if (neigh)
1307                 neigh_update(neigh, lladdr, NUD_STALE,
1308                              NEIGH_UPDATE_F_OVERRIDE, 0);
1309         return neigh;
1310 }
1311 EXPORT_SYMBOL(neigh_event_ns);
1312
1313 /* called with read_lock_bh(&n->lock); */
1314 static void neigh_hh_init(struct neighbour *n)
1315 {
1316         struct net_device *dev = n->dev;
1317         __be16 prot = n->tbl->protocol;
1318         struct hh_cache *hh = &n->hh;
1319
1320         write_lock_bh(&n->lock);
1321
1322         /* Only one thread can come in here and initialize the
1323          * hh_cache entry.
1324          */
1325         if (!hh->hh_len)
1326                 dev->header_ops->cache(n, hh, prot);
1327
1328         write_unlock_bh(&n->lock);
1329 }
1330
1331 /* Slow and careful. */
1332
1333 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1334 {
1335         int rc = 0;
1336
1337         if (!neigh_event_send(neigh, skb)) {
1338                 int err;
1339                 struct net_device *dev = neigh->dev;
1340                 unsigned int seq;
1341
1342                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1343                         neigh_hh_init(neigh);
1344
1345                 do {
1346                         __skb_pull(skb, skb_network_offset(skb));
1347                         seq = read_seqbegin(&neigh->ha_lock);
1348                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1349                                               neigh->ha, NULL, skb->len);
1350                 } while (read_seqretry(&neigh->ha_lock, seq));
1351
1352                 if (err >= 0)
1353                         rc = dev_queue_xmit(skb);
1354                 else
1355                         goto out_kfree_skb;
1356         }
1357 out:
1358         return rc;
1359 out_kfree_skb:
1360         rc = -EINVAL;
1361         kfree_skb(skb);
1362         goto out;
1363 }
1364 EXPORT_SYMBOL(neigh_resolve_output);
1365
1366 /* As fast as possible without hh cache */
1367
1368 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1369 {
1370         struct net_device *dev = neigh->dev;
1371         unsigned int seq;
1372         int err;
1373
1374         do {
1375                 __skb_pull(skb, skb_network_offset(skb));
1376                 seq = read_seqbegin(&neigh->ha_lock);
1377                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1378                                       neigh->ha, NULL, skb->len);
1379         } while (read_seqretry(&neigh->ha_lock, seq));
1380
1381         if (err >= 0)
1382                 err = dev_queue_xmit(skb);
1383         else {
1384                 err = -EINVAL;
1385                 kfree_skb(skb);
1386         }
1387         return err;
1388 }
1389 EXPORT_SYMBOL(neigh_connected_output);
1390
1391 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1392 {
1393         return dev_queue_xmit(skb);
1394 }
1395 EXPORT_SYMBOL(neigh_direct_output);
1396
1397 static void neigh_proxy_process(unsigned long arg)
1398 {
1399         struct neigh_table *tbl = (struct neigh_table *)arg;
1400         long sched_next = 0;
1401         unsigned long now = jiffies;
1402         struct sk_buff *skb, *n;
1403
1404         spin_lock(&tbl->proxy_queue.lock);
1405
1406         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1407                 long tdif = NEIGH_CB(skb)->sched_next - now;
1408
1409                 if (tdif <= 0) {
1410                         struct net_device *dev = skb->dev;
1411
1412                         __skb_unlink(skb, &tbl->proxy_queue);
1413                         if (tbl->proxy_redo && netif_running(dev)) {
1414                                 rcu_read_lock();
1415                                 tbl->proxy_redo(skb);
1416                                 rcu_read_unlock();
1417                         } else {
1418                                 kfree_skb(skb);
1419                         }
1420
1421                         dev_put(dev);
1422                 } else if (!sched_next || tdif < sched_next)
1423                         sched_next = tdif;
1424         }
1425         del_timer(&tbl->proxy_timer);
1426         if (sched_next)
1427                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1428         spin_unlock(&tbl->proxy_queue.lock);
1429 }
1430
1431 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1432                     struct sk_buff *skb)
1433 {
1434         unsigned long now = jiffies;
1435
1436         unsigned long sched_next = now + (prandom_u32() %
1437                                           NEIGH_VAR(p, PROXY_DELAY));
1438
1439         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1440                 kfree_skb(skb);
1441                 return;
1442         }
1443
1444         NEIGH_CB(skb)->sched_next = sched_next;
1445         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1446
1447         spin_lock(&tbl->proxy_queue.lock);
1448         if (del_timer(&tbl->proxy_timer)) {
1449                 if (time_before(tbl->proxy_timer.expires, sched_next))
1450                         sched_next = tbl->proxy_timer.expires;
1451         }
1452         skb_dst_drop(skb);
1453         dev_hold(skb->dev);
1454         __skb_queue_tail(&tbl->proxy_queue, skb);
1455         mod_timer(&tbl->proxy_timer, sched_next);
1456         spin_unlock(&tbl->proxy_queue.lock);
1457 }
1458 EXPORT_SYMBOL(pneigh_enqueue);
1459
1460 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1461                                                       struct net *net, int ifindex)
1462 {
1463         struct neigh_parms *p;
1464
1465         list_for_each_entry(p, &tbl->parms_list, list) {
1466                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1467                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1468                         return p;
1469         }
1470
1471         return NULL;
1472 }
1473
1474 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1475                                       struct neigh_table *tbl)
1476 {
1477         struct neigh_parms *p;
1478         struct net *net = dev_net(dev);
1479         const struct net_device_ops *ops = dev->netdev_ops;
1480
1481         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1482         if (p) {
1483                 p->tbl            = tbl;
1484                 refcount_set(&p->refcnt, 1);
1485                 p->reachable_time =
1486                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1487                 dev_hold(dev);
1488                 p->dev = dev;
1489                 write_pnet(&p->net, net);
1490                 p->sysctl_table = NULL;
1491
1492                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1493                         dev_put(dev);
1494                         kfree(p);
1495                         return NULL;
1496                 }
1497
1498                 write_lock_bh(&tbl->lock);
1499                 list_add(&p->list, &tbl->parms.list);
1500                 write_unlock_bh(&tbl->lock);
1501
1502                 neigh_parms_data_state_cleanall(p);
1503         }
1504         return p;
1505 }
1506 EXPORT_SYMBOL(neigh_parms_alloc);
1507
1508 static void neigh_rcu_free_parms(struct rcu_head *head)
1509 {
1510         struct neigh_parms *parms =
1511                 container_of(head, struct neigh_parms, rcu_head);
1512
1513         neigh_parms_put(parms);
1514 }
1515
1516 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1517 {
1518         if (!parms || parms == &tbl->parms)
1519                 return;
1520         write_lock_bh(&tbl->lock);
1521         list_del(&parms->list);
1522         parms->dead = 1;
1523         write_unlock_bh(&tbl->lock);
1524         if (parms->dev)
1525                 dev_put(parms->dev);
1526         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1527 }
1528 EXPORT_SYMBOL(neigh_parms_release);
1529
1530 static void neigh_parms_destroy(struct neigh_parms *parms)
1531 {
1532         kfree(parms);
1533 }
1534
1535 static struct lock_class_key neigh_table_proxy_queue_class;
1536
1537 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1538
1539 void neigh_table_init(int index, struct neigh_table *tbl)
1540 {
1541         unsigned long now = jiffies;
1542         unsigned long phsize;
1543
1544         INIT_LIST_HEAD(&tbl->parms_list);
1545         list_add(&tbl->parms.list, &tbl->parms_list);
1546         write_pnet(&tbl->parms.net, &init_net);
1547         refcount_set(&tbl->parms.refcnt, 1);
1548         tbl->parms.reachable_time =
1549                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1550
1551         tbl->stats = alloc_percpu(struct neigh_statistics);
1552         if (!tbl->stats)
1553                 panic("cannot create neighbour cache statistics");
1554
1555 #ifdef CONFIG_PROC_FS
1556         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1557                               &neigh_stat_seq_fops, tbl))
1558                 panic("cannot create neighbour proc dir entry");
1559 #endif
1560
1561         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1562
1563         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1564         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1565
1566         if (!tbl->nht || !tbl->phash_buckets)
1567                 panic("cannot allocate neighbour cache hashes");
1568
1569         if (!tbl->entry_size)
1570                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1571                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1572         else
1573                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1574
1575         rwlock_init(&tbl->lock);
1576         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1577         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1578                         tbl->parms.reachable_time);
1579         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1580         skb_queue_head_init_class(&tbl->proxy_queue,
1581                         &neigh_table_proxy_queue_class);
1582
1583         tbl->last_flush = now;
1584         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1585
1586         neigh_tables[index] = tbl;
1587 }
1588 EXPORT_SYMBOL(neigh_table_init);
1589
1590 int neigh_table_clear(int index, struct neigh_table *tbl)
1591 {
1592         neigh_tables[index] = NULL;
1593         /* It is not clean... Fix it to unload IPv6 module safely */
1594         cancel_delayed_work_sync(&tbl->gc_work);
1595         del_timer_sync(&tbl->proxy_timer);
1596         pneigh_queue_purge(&tbl->proxy_queue, NULL);
1597         neigh_ifdown(tbl, NULL);
1598         if (atomic_read(&tbl->entries))
1599                 pr_crit("neighbour leakage\n");
1600
1601         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1602                  neigh_hash_free_rcu);
1603         tbl->nht = NULL;
1604
1605         kfree(tbl->phash_buckets);
1606         tbl->phash_buckets = NULL;
1607
1608         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1609
1610         free_percpu(tbl->stats);
1611         tbl->stats = NULL;
1612
1613         return 0;
1614 }
1615 EXPORT_SYMBOL(neigh_table_clear);
1616
1617 static struct neigh_table *neigh_find_table(int family)
1618 {
1619         struct neigh_table *tbl = NULL;
1620
1621         switch (family) {
1622         case AF_INET:
1623                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1624                 break;
1625         case AF_INET6:
1626                 tbl = neigh_tables[NEIGH_ND_TABLE];
1627                 break;
1628         }
1629
1630         return tbl;
1631 }
1632
1633 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1634                         struct netlink_ext_ack *extack)
1635 {
1636         struct net *net = sock_net(skb->sk);
1637         struct ndmsg *ndm;
1638         struct nlattr *dst_attr;
1639         struct neigh_table *tbl;
1640         struct neighbour *neigh;
1641         struct net_device *dev = NULL;
1642         int err = -EINVAL;
1643
1644         ASSERT_RTNL();
1645         if (nlmsg_len(nlh) < sizeof(*ndm))
1646                 goto out;
1647
1648         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1649         if (dst_attr == NULL)
1650                 goto out;
1651
1652         ndm = nlmsg_data(nlh);
1653         if (ndm->ndm_ifindex) {
1654                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1655                 if (dev == NULL) {
1656                         err = -ENODEV;
1657                         goto out;
1658                 }
1659         }
1660
1661         tbl = neigh_find_table(ndm->ndm_family);
1662         if (tbl == NULL)
1663                 return -EAFNOSUPPORT;
1664
1665         if (nla_len(dst_attr) < tbl->key_len)
1666                 goto out;
1667
1668         if (ndm->ndm_flags & NTF_PROXY) {
1669                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1670                 goto out;
1671         }
1672
1673         if (dev == NULL)
1674                 goto out;
1675
1676         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1677         if (neigh == NULL) {
1678                 err = -ENOENT;
1679                 goto out;
1680         }
1681
1682         err = neigh_update(neigh, NULL, NUD_FAILED,
1683                            NEIGH_UPDATE_F_OVERRIDE |
1684                            NEIGH_UPDATE_F_ADMIN,
1685                            NETLINK_CB(skb).portid);
1686         write_lock_bh(&tbl->lock);
1687         neigh_release(neigh);
1688         neigh_remove_one(neigh, tbl);
1689         write_unlock_bh(&tbl->lock);
1690
1691 out:
1692         return err;
1693 }
1694
1695 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1696                      struct netlink_ext_ack *extack)
1697 {
1698         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1699         struct net *net = sock_net(skb->sk);
1700         struct ndmsg *ndm;
1701         struct nlattr *tb[NDA_MAX+1];
1702         struct neigh_table *tbl;
1703         struct net_device *dev = NULL;
1704         struct neighbour *neigh;
1705         void *dst, *lladdr;
1706         int err;
1707
1708         ASSERT_RTNL();
1709         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1710         if (err < 0)
1711                 goto out;
1712
1713         err = -EINVAL;
1714         if (tb[NDA_DST] == NULL)
1715                 goto out;
1716
1717         ndm = nlmsg_data(nlh);
1718         if (ndm->ndm_ifindex) {
1719                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1720                 if (dev == NULL) {
1721                         err = -ENODEV;
1722                         goto out;
1723                 }
1724
1725                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1726                         goto out;
1727         }
1728
1729         tbl = neigh_find_table(ndm->ndm_family);
1730         if (tbl == NULL)
1731                 return -EAFNOSUPPORT;
1732
1733         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1734                 goto out;
1735         dst = nla_data(tb[NDA_DST]);
1736         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1737
1738         if (ndm->ndm_flags & NTF_PROXY) {
1739                 struct pneigh_entry *pn;
1740
1741                 err = -ENOBUFS;
1742                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1743                 if (pn) {
1744                         pn->flags = ndm->ndm_flags;
1745                         err = 0;
1746                 }
1747                 goto out;
1748         }
1749
1750         if (dev == NULL)
1751                 goto out;
1752
1753         neigh = neigh_lookup(tbl, dst, dev);
1754         if (neigh == NULL) {
1755                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1756                         err = -ENOENT;
1757                         goto out;
1758                 }
1759
1760                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1761                 if (IS_ERR(neigh)) {
1762                         err = PTR_ERR(neigh);
1763                         goto out;
1764                 }
1765         } else {
1766                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1767                         err = -EEXIST;
1768                         neigh_release(neigh);
1769                         goto out;
1770                 }
1771
1772                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1773                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1774         }
1775
1776         if (ndm->ndm_flags & NTF_USE) {
1777                 neigh_event_send(neigh, NULL);
1778                 err = 0;
1779         } else
1780                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1781                                    NETLINK_CB(skb).portid);
1782         neigh_release(neigh);
1783
1784 out:
1785         return err;
1786 }
1787
1788 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1789 {
1790         struct nlattr *nest;
1791
1792         nest = nla_nest_start(skb, NDTA_PARMS);
1793         if (nest == NULL)
1794                 return -ENOBUFS;
1795
1796         if ((parms->dev &&
1797              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1798             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1799             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1800                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1801             /* approximative value for deprecated QUEUE_LEN (in packets) */
1802             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1803                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1804             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1805             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1806             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1807                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1808             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1809                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1810             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1811                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1812             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1813                           NDTPA_PAD) ||
1814             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1815                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1816             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1817                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1818             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1819                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1820             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1821                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1822             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1823                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1824             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1825                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1826             nla_put_msecs(skb, NDTPA_LOCKTIME,
1827                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1828                 goto nla_put_failure;
1829         return nla_nest_end(skb, nest);
1830
1831 nla_put_failure:
1832         nla_nest_cancel(skb, nest);
1833         return -EMSGSIZE;
1834 }
1835
1836 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1837                               u32 pid, u32 seq, int type, int flags)
1838 {
1839         struct nlmsghdr *nlh;
1840         struct ndtmsg *ndtmsg;
1841
1842         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1843         if (nlh == NULL)
1844                 return -EMSGSIZE;
1845
1846         ndtmsg = nlmsg_data(nlh);
1847
1848         read_lock_bh(&tbl->lock);
1849         ndtmsg->ndtm_family = tbl->family;
1850         ndtmsg->ndtm_pad1   = 0;
1851         ndtmsg->ndtm_pad2   = 0;
1852
1853         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1854             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1855             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1856             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1857             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1858                 goto nla_put_failure;
1859         {
1860                 unsigned long now = jiffies;
1861                 long flush_delta = now - tbl->last_flush;
1862                 long rand_delta = now - tbl->last_rand;
1863                 struct neigh_hash_table *nht;
1864                 struct ndt_config ndc = {
1865                         .ndtc_key_len           = tbl->key_len,
1866                         .ndtc_entry_size        = tbl->entry_size,
1867                         .ndtc_entries           = atomic_read(&tbl->entries),
1868                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1869                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1870                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1871                 };
1872
1873                 rcu_read_lock_bh();
1874                 nht = rcu_dereference_bh(tbl->nht);
1875                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1876                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1877                 rcu_read_unlock_bh();
1878
1879                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1880                         goto nla_put_failure;
1881         }
1882
1883         {
1884                 int cpu;
1885                 struct ndt_stats ndst;
1886
1887                 memset(&ndst, 0, sizeof(ndst));
1888
1889                 for_each_possible_cpu(cpu) {
1890                         struct neigh_statistics *st;
1891
1892                         st = per_cpu_ptr(tbl->stats, cpu);
1893                         ndst.ndts_allocs                += st->allocs;
1894                         ndst.ndts_destroys              += st->destroys;
1895                         ndst.ndts_hash_grows            += st->hash_grows;
1896                         ndst.ndts_res_failed            += st->res_failed;
1897                         ndst.ndts_lookups               += st->lookups;
1898                         ndst.ndts_hits                  += st->hits;
1899                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1900                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1901                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1902                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1903                         ndst.ndts_table_fulls           += st->table_fulls;
1904                 }
1905
1906                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1907                                   NDTA_PAD))
1908                         goto nla_put_failure;
1909         }
1910
1911         BUG_ON(tbl->parms.dev);
1912         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1913                 goto nla_put_failure;
1914
1915         read_unlock_bh(&tbl->lock);
1916         nlmsg_end(skb, nlh);
1917         return 0;
1918
1919 nla_put_failure:
1920         read_unlock_bh(&tbl->lock);
1921         nlmsg_cancel(skb, nlh);
1922         return -EMSGSIZE;
1923 }
1924
1925 static int neightbl_fill_param_info(struct sk_buff *skb,
1926                                     struct neigh_table *tbl,
1927                                     struct neigh_parms *parms,
1928                                     u32 pid, u32 seq, int type,
1929                                     unsigned int flags)
1930 {
1931         struct ndtmsg *ndtmsg;
1932         struct nlmsghdr *nlh;
1933
1934         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1935         if (nlh == NULL)
1936                 return -EMSGSIZE;
1937
1938         ndtmsg = nlmsg_data(nlh);
1939
1940         read_lock_bh(&tbl->lock);
1941         ndtmsg->ndtm_family = tbl->family;
1942         ndtmsg->ndtm_pad1   = 0;
1943         ndtmsg->ndtm_pad2   = 0;
1944
1945         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1946             neightbl_fill_parms(skb, parms) < 0)
1947                 goto errout;
1948
1949         read_unlock_bh(&tbl->lock);
1950         nlmsg_end(skb, nlh);
1951         return 0;
1952 errout:
1953         read_unlock_bh(&tbl->lock);
1954         nlmsg_cancel(skb, nlh);
1955         return -EMSGSIZE;
1956 }
1957
1958 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1959         [NDTA_NAME]             = { .type = NLA_STRING },
1960         [NDTA_THRESH1]          = { .type = NLA_U32 },
1961         [NDTA_THRESH2]          = { .type = NLA_U32 },
1962         [NDTA_THRESH3]          = { .type = NLA_U32 },
1963         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1964         [NDTA_PARMS]            = { .type = NLA_NESTED },
1965 };
1966
1967 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1968         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1969         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1970         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1971         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1972         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1973         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1974         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1975         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1976         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1977         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1978         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1979         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1980         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1981         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1982 };
1983
1984 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
1985                         struct netlink_ext_ack *extack)
1986 {
1987         struct net *net = sock_net(skb->sk);
1988         struct neigh_table *tbl;
1989         struct ndtmsg *ndtmsg;
1990         struct nlattr *tb[NDTA_MAX+1];
1991         bool found = false;
1992         int err, tidx;
1993
1994         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1995                           nl_neightbl_policy, extack);
1996         if (err < 0)
1997                 goto errout;
1998
1999         if (tb[NDTA_NAME] == NULL) {
2000                 err = -EINVAL;
2001                 goto errout;
2002         }
2003
2004         ndtmsg = nlmsg_data(nlh);
2005
2006         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2007                 tbl = neigh_tables[tidx];
2008                 if (!tbl)
2009                         continue;
2010                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2011                         continue;
2012                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2013                         found = true;
2014                         break;
2015                 }
2016         }
2017
2018         if (!found)
2019                 return -ENOENT;
2020
2021         /*
2022          * We acquire tbl->lock to be nice to the periodic timers and
2023          * make sure they always see a consistent set of values.
2024          */
2025         write_lock_bh(&tbl->lock);
2026
2027         if (tb[NDTA_PARMS]) {
2028                 struct nlattr *tbp[NDTPA_MAX+1];
2029                 struct neigh_parms *p;
2030                 int i, ifindex = 0;
2031
2032                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2033                                        nl_ntbl_parm_policy, extack);
2034                 if (err < 0)
2035                         goto errout_tbl_lock;
2036
2037                 if (tbp[NDTPA_IFINDEX])
2038                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2039
2040                 p = lookup_neigh_parms(tbl, net, ifindex);
2041                 if (p == NULL) {
2042                         err = -ENOENT;
2043                         goto errout_tbl_lock;
2044                 }
2045
2046                 for (i = 1; i <= NDTPA_MAX; i++) {
2047                         if (tbp[i] == NULL)
2048                                 continue;
2049
2050                         switch (i) {
2051                         case NDTPA_QUEUE_LEN:
2052                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2053                                               nla_get_u32(tbp[i]) *
2054                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2055                                 break;
2056                         case NDTPA_QUEUE_LENBYTES:
2057                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2058                                               nla_get_u32(tbp[i]));
2059                                 break;
2060                         case NDTPA_PROXY_QLEN:
2061                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2062                                               nla_get_u32(tbp[i]));
2063                                 break;
2064                         case NDTPA_APP_PROBES:
2065                                 NEIGH_VAR_SET(p, APP_PROBES,
2066                                               nla_get_u32(tbp[i]));
2067                                 break;
2068                         case NDTPA_UCAST_PROBES:
2069                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2070                                               nla_get_u32(tbp[i]));
2071                                 break;
2072                         case NDTPA_MCAST_PROBES:
2073                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2074                                               nla_get_u32(tbp[i]));
2075                                 break;
2076                         case NDTPA_MCAST_REPROBES:
2077                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2078                                               nla_get_u32(tbp[i]));
2079                                 break;
2080                         case NDTPA_BASE_REACHABLE_TIME:
2081                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2082                                               nla_get_msecs(tbp[i]));
2083                                 /* update reachable_time as well, otherwise, the change will
2084                                  * only be effective after the next time neigh_periodic_work
2085                                  * decides to recompute it (can be multiple minutes)
2086                                  */
2087                                 p->reachable_time =
2088                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2089                                 break;
2090                         case NDTPA_GC_STALETIME:
2091                                 NEIGH_VAR_SET(p, GC_STALETIME,
2092                                               nla_get_msecs(tbp[i]));
2093                                 break;
2094                         case NDTPA_DELAY_PROBE_TIME:
2095                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2096                                               nla_get_msecs(tbp[i]));
2097                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2098                                 break;
2099                         case NDTPA_RETRANS_TIME:
2100                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2101                                               nla_get_msecs(tbp[i]));
2102                                 break;
2103                         case NDTPA_ANYCAST_DELAY:
2104                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2105                                               nla_get_msecs(tbp[i]));
2106                                 break;
2107                         case NDTPA_PROXY_DELAY:
2108                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2109                                               nla_get_msecs(tbp[i]));
2110                                 break;
2111                         case NDTPA_LOCKTIME:
2112                                 NEIGH_VAR_SET(p, LOCKTIME,
2113                                               nla_get_msecs(tbp[i]));
2114                                 break;
2115                         }
2116                 }
2117         }
2118
2119         err = -ENOENT;
2120         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2121              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2122             !net_eq(net, &init_net))
2123                 goto errout_tbl_lock;
2124
2125         if (tb[NDTA_THRESH1])
2126                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2127
2128         if (tb[NDTA_THRESH2])
2129                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2130
2131         if (tb[NDTA_THRESH3])
2132                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2133
2134         if (tb[NDTA_GC_INTERVAL])
2135                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2136
2137         err = 0;
2138
2139 errout_tbl_lock:
2140         write_unlock_bh(&tbl->lock);
2141 errout:
2142         return err;
2143 }
2144
2145 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2146 {
2147         struct net *net = sock_net(skb->sk);
2148         int family, tidx, nidx = 0;
2149         int tbl_skip = cb->args[0];
2150         int neigh_skip = cb->args[1];
2151         struct neigh_table *tbl;
2152
2153         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2154
2155         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2156                 struct neigh_parms *p;
2157
2158                 tbl = neigh_tables[tidx];
2159                 if (!tbl)
2160                         continue;
2161
2162                 if (tidx < tbl_skip || (family && tbl->family != family))
2163                         continue;
2164
2165                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2166                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2167                                        NLM_F_MULTI) < 0)
2168                         break;
2169
2170                 nidx = 0;
2171                 p = list_next_entry(&tbl->parms, list);
2172                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2173                         if (!net_eq(neigh_parms_net(p), net))
2174                                 continue;
2175
2176                         if (nidx < neigh_skip)
2177                                 goto next;
2178
2179                         if (neightbl_fill_param_info(skb, tbl, p,
2180                                                      NETLINK_CB(cb->skb).portid,
2181                                                      cb->nlh->nlmsg_seq,
2182                                                      RTM_NEWNEIGHTBL,
2183                                                      NLM_F_MULTI) < 0)
2184                                 goto out;
2185                 next:
2186                         nidx++;
2187                 }
2188
2189                 neigh_skip = 0;
2190         }
2191 out:
2192         cb->args[0] = tidx;
2193         cb->args[1] = nidx;
2194
2195         return skb->len;
2196 }
2197
2198 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2199                            u32 pid, u32 seq, int type, unsigned int flags)
2200 {
2201         unsigned long now = jiffies;
2202         struct nda_cacheinfo ci;
2203         struct nlmsghdr *nlh;
2204         struct ndmsg *ndm;
2205
2206         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2207         if (nlh == NULL)
2208                 return -EMSGSIZE;
2209
2210         ndm = nlmsg_data(nlh);
2211         ndm->ndm_family  = neigh->ops->family;
2212         ndm->ndm_pad1    = 0;
2213         ndm->ndm_pad2    = 0;
2214         ndm->ndm_flags   = neigh->flags;
2215         ndm->ndm_type    = neigh->type;
2216         ndm->ndm_ifindex = neigh->dev->ifindex;
2217
2218         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2219                 goto nla_put_failure;
2220
2221         read_lock_bh(&neigh->lock);
2222         ndm->ndm_state   = neigh->nud_state;
2223         if (neigh->nud_state & NUD_VALID) {
2224                 char haddr[MAX_ADDR_LEN];
2225
2226                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2227                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2228                         read_unlock_bh(&neigh->lock);
2229                         goto nla_put_failure;
2230                 }
2231         }
2232
2233         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2234         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2235         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2236         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2237         read_unlock_bh(&neigh->lock);
2238
2239         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2240             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2241                 goto nla_put_failure;
2242
2243         nlmsg_end(skb, nlh);
2244         return 0;
2245
2246 nla_put_failure:
2247         nlmsg_cancel(skb, nlh);
2248         return -EMSGSIZE;
2249 }
2250
2251 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2252                             u32 pid, u32 seq, int type, unsigned int flags,
2253                             struct neigh_table *tbl)
2254 {
2255         struct nlmsghdr *nlh;
2256         struct ndmsg *ndm;
2257
2258         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2259         if (nlh == NULL)
2260                 return -EMSGSIZE;
2261
2262         ndm = nlmsg_data(nlh);
2263         ndm->ndm_family  = tbl->family;
2264         ndm->ndm_pad1    = 0;
2265         ndm->ndm_pad2    = 0;
2266         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2267         ndm->ndm_type    = RTN_UNICAST;
2268         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2269         ndm->ndm_state   = NUD_NONE;
2270
2271         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2272                 goto nla_put_failure;
2273
2274         nlmsg_end(skb, nlh);
2275         return 0;
2276
2277 nla_put_failure:
2278         nlmsg_cancel(skb, nlh);
2279         return -EMSGSIZE;
2280 }
2281
2282 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2283 {
2284         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2285         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2286 }
2287
2288 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2289 {
2290         struct net_device *master;
2291
2292         if (!master_idx)
2293                 return false;
2294
2295         master = netdev_master_upper_dev_get(dev);
2296         if (!master || master->ifindex != master_idx)
2297                 return true;
2298
2299         return false;
2300 }
2301
2302 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2303 {
2304         if (filter_idx && dev->ifindex != filter_idx)
2305                 return true;
2306
2307         return false;
2308 }
2309
2310 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2311                             struct netlink_callback *cb)
2312 {
2313         struct net *net = sock_net(skb->sk);
2314         const struct nlmsghdr *nlh = cb->nlh;
2315         struct nlattr *tb[NDA_MAX + 1];
2316         struct neighbour *n;
2317         int rc, h, s_h = cb->args[1];
2318         int idx, s_idx = idx = cb->args[2];
2319         struct neigh_hash_table *nht;
2320         int filter_master_idx = 0, filter_idx = 0;
2321         unsigned int flags = NLM_F_MULTI;
2322         int err;
2323
2324         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2325         if (!err) {
2326                 if (tb[NDA_IFINDEX]) {
2327                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2328                                 return -EINVAL;
2329                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2330                 }
2331                 if (tb[NDA_MASTER]) {
2332                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2333                                 return -EINVAL;
2334                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2335                 }
2336                 if (filter_idx || filter_master_idx)
2337                         flags |= NLM_F_DUMP_FILTERED;
2338         }
2339
2340         rcu_read_lock_bh();
2341         nht = rcu_dereference_bh(tbl->nht);
2342
2343         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2344                 if (h > s_h)
2345                         s_idx = 0;
2346                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2347                      n != NULL;
2348                      n = rcu_dereference_bh(n->next)) {
2349                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2350                                 goto next;
2351                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2352                             neigh_master_filtered(n->dev, filter_master_idx))
2353                                 goto next;
2354                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2355                                             cb->nlh->nlmsg_seq,
2356                                             RTM_NEWNEIGH,
2357                                             flags) < 0) {
2358                                 rc = -1;
2359                                 goto out;
2360                         }
2361 next:
2362                         idx++;
2363                 }
2364         }
2365         rc = skb->len;
2366 out:
2367         rcu_read_unlock_bh();
2368         cb->args[1] = h;
2369         cb->args[2] = idx;
2370         return rc;
2371 }
2372
2373 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2374                              struct netlink_callback *cb)
2375 {
2376         struct pneigh_entry *n;
2377         struct net *net = sock_net(skb->sk);
2378         int rc, h, s_h = cb->args[3];
2379         int idx, s_idx = idx = cb->args[4];
2380
2381         read_lock_bh(&tbl->lock);
2382
2383         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2384                 if (h > s_h)
2385                         s_idx = 0;
2386                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2387                         if (idx < s_idx || pneigh_net(n) != net)
2388                                 goto next;
2389                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2390                                             cb->nlh->nlmsg_seq,
2391                                             RTM_NEWNEIGH,
2392                                             NLM_F_MULTI, tbl) < 0) {
2393                                 read_unlock_bh(&tbl->lock);
2394                                 rc = -1;
2395                                 goto out;
2396                         }
2397                 next:
2398                         idx++;
2399                 }
2400         }
2401
2402         read_unlock_bh(&tbl->lock);
2403         rc = skb->len;
2404 out:
2405         cb->args[3] = h;
2406         cb->args[4] = idx;
2407         return rc;
2408
2409 }
2410
2411 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2412 {
2413         struct neigh_table *tbl;
2414         int t, family, s_t;
2415         int proxy = 0;
2416         int err;
2417
2418         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2419
2420         /* check for full ndmsg structure presence, family member is
2421          * the same for both structures
2422          */
2423         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2424             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2425                 proxy = 1;
2426
2427         s_t = cb->args[0];
2428
2429         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2430                 tbl = neigh_tables[t];
2431
2432                 if (!tbl)
2433                         continue;
2434                 if (t < s_t || (family && tbl->family != family))
2435                         continue;
2436                 if (t > s_t)
2437                         memset(&cb->args[1], 0, sizeof(cb->args) -
2438                                                 sizeof(cb->args[0]));
2439                 if (proxy)
2440                         err = pneigh_dump_table(tbl, skb, cb);
2441                 else
2442                         err = neigh_dump_table(tbl, skb, cb);
2443                 if (err < 0)
2444                         break;
2445         }
2446
2447         cb->args[0] = t;
2448         return skb->len;
2449 }
2450
2451 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2452 {
2453         int chain;
2454         struct neigh_hash_table *nht;
2455
2456         rcu_read_lock_bh();
2457         nht = rcu_dereference_bh(tbl->nht);
2458
2459         read_lock(&tbl->lock); /* avoid resizes */
2460         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2461                 struct neighbour *n;
2462
2463                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2464                      n != NULL;
2465                      n = rcu_dereference_bh(n->next))
2466                         cb(n, cookie);
2467         }
2468         read_unlock(&tbl->lock);
2469         rcu_read_unlock_bh();
2470 }
2471 EXPORT_SYMBOL(neigh_for_each);
2472
2473 /* The tbl->lock must be held as a writer and BH disabled. */
2474 void __neigh_for_each_release(struct neigh_table *tbl,
2475                               int (*cb)(struct neighbour *))
2476 {
2477         int chain;
2478         struct neigh_hash_table *nht;
2479
2480         nht = rcu_dereference_protected(tbl->nht,
2481                                         lockdep_is_held(&tbl->lock));
2482         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2483                 struct neighbour *n;
2484                 struct neighbour __rcu **np;
2485
2486                 np = &nht->hash_buckets[chain];
2487                 while ((n = rcu_dereference_protected(*np,
2488                                         lockdep_is_held(&tbl->lock))) != NULL) {
2489                         int release;
2490
2491                         write_lock(&n->lock);
2492                         release = cb(n);
2493                         if (release) {
2494                                 rcu_assign_pointer(*np,
2495                                         rcu_dereference_protected(n->next,
2496                                                 lockdep_is_held(&tbl->lock)));
2497                                 n->dead = 1;
2498                         } else
2499                                 np = &n->next;
2500                         write_unlock(&n->lock);
2501                         if (release)
2502                                 neigh_cleanup_and_release(n);
2503                 }
2504         }
2505 }
2506 EXPORT_SYMBOL(__neigh_for_each_release);
2507
2508 int neigh_xmit(int index, struct net_device *dev,
2509                const void *addr, struct sk_buff *skb)
2510 {
2511         int err = -EAFNOSUPPORT;
2512         if (likely(index < NEIGH_NR_TABLES)) {
2513                 struct neigh_table *tbl;
2514                 struct neighbour *neigh;
2515
2516                 tbl = neigh_tables[index];
2517                 if (!tbl)
2518                         goto out;
2519                 rcu_read_lock_bh();
2520                 if (index == NEIGH_ARP_TABLE) {
2521                         u32 key = *((u32 *)addr);
2522
2523                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2524                 } else {
2525                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2526                 }
2527                 if (!neigh)
2528                         neigh = __neigh_create(tbl, addr, dev, false);
2529                 err = PTR_ERR(neigh);
2530                 if (IS_ERR(neigh)) {
2531                         rcu_read_unlock_bh();
2532                         goto out_kfree_skb;
2533                 }
2534                 err = neigh->output(neigh, skb);
2535                 rcu_read_unlock_bh();
2536         }
2537         else if (index == NEIGH_LINK_TABLE) {
2538                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2539                                       addr, NULL, skb->len);
2540                 if (err < 0)
2541                         goto out_kfree_skb;
2542                 err = dev_queue_xmit(skb);
2543         }
2544 out:
2545         return err;
2546 out_kfree_skb:
2547         kfree_skb(skb);
2548         goto out;
2549 }
2550 EXPORT_SYMBOL(neigh_xmit);
2551
2552 #ifdef CONFIG_PROC_FS
2553
2554 static struct neighbour *neigh_get_first(struct seq_file *seq)
2555 {
2556         struct neigh_seq_state *state = seq->private;
2557         struct net *net = seq_file_net(seq);
2558         struct neigh_hash_table *nht = state->nht;
2559         struct neighbour *n = NULL;
2560         int bucket = state->bucket;
2561
2562         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2563         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2564                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2565
2566                 while (n) {
2567                         if (!net_eq(dev_net(n->dev), net))
2568                                 goto next;
2569                         if (state->neigh_sub_iter) {
2570                                 loff_t fakep = 0;
2571                                 void *v;
2572
2573                                 v = state->neigh_sub_iter(state, n, &fakep);
2574                                 if (!v)
2575                                         goto next;
2576                         }
2577                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2578                                 break;
2579                         if (n->nud_state & ~NUD_NOARP)
2580                                 break;
2581 next:
2582                         n = rcu_dereference_bh(n->next);
2583                 }
2584
2585                 if (n)
2586                         break;
2587         }
2588         state->bucket = bucket;
2589
2590         return n;
2591 }
2592
2593 static struct neighbour *neigh_get_next(struct seq_file *seq,
2594                                         struct neighbour *n,
2595                                         loff_t *pos)
2596 {
2597         struct neigh_seq_state *state = seq->private;
2598         struct net *net = seq_file_net(seq);
2599         struct neigh_hash_table *nht = state->nht;
2600
2601         if (state->neigh_sub_iter) {
2602                 void *v = state->neigh_sub_iter(state, n, pos);
2603                 if (v)
2604                         return n;
2605         }
2606         n = rcu_dereference_bh(n->next);
2607
2608         while (1) {
2609                 while (n) {
2610                         if (!net_eq(dev_net(n->dev), net))
2611                                 goto next;
2612                         if (state->neigh_sub_iter) {
2613                                 void *v = state->neigh_sub_iter(state, n, pos);
2614                                 if (v)
2615                                         return n;
2616                                 goto next;
2617                         }
2618                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2619                                 break;
2620
2621                         if (n->nud_state & ~NUD_NOARP)
2622                                 break;
2623 next:
2624                         n = rcu_dereference_bh(n->next);
2625                 }
2626
2627                 if (n)
2628                         break;
2629
2630                 if (++state->bucket >= (1 << nht->hash_shift))
2631                         break;
2632
2633                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2634         }
2635
2636         if (n && pos)
2637                 --(*pos);
2638         return n;
2639 }
2640
2641 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2642 {
2643         struct neighbour *n = neigh_get_first(seq);
2644
2645         if (n) {
2646                 --(*pos);
2647                 while (*pos) {
2648                         n = neigh_get_next(seq, n, pos);
2649                         if (!n)
2650                                 break;
2651                 }
2652         }
2653         return *pos ? NULL : n;
2654 }
2655
2656 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2657 {
2658         struct neigh_seq_state *state = seq->private;
2659         struct net *net = seq_file_net(seq);
2660         struct neigh_table *tbl = state->tbl;
2661         struct pneigh_entry *pn = NULL;
2662         int bucket = state->bucket;
2663
2664         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2665         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2666                 pn = tbl->phash_buckets[bucket];
2667                 while (pn && !net_eq(pneigh_net(pn), net))
2668                         pn = pn->next;
2669                 if (pn)
2670                         break;
2671         }
2672         state->bucket = bucket;
2673
2674         return pn;
2675 }
2676
2677 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2678                                             struct pneigh_entry *pn,
2679                                             loff_t *pos)
2680 {
2681         struct neigh_seq_state *state = seq->private;
2682         struct net *net = seq_file_net(seq);
2683         struct neigh_table *tbl = state->tbl;
2684
2685         do {
2686                 pn = pn->next;
2687         } while (pn && !net_eq(pneigh_net(pn), net));
2688
2689         while (!pn) {
2690                 if (++state->bucket > PNEIGH_HASHMASK)
2691                         break;
2692                 pn = tbl->phash_buckets[state->bucket];
2693                 while (pn && !net_eq(pneigh_net(pn), net))
2694                         pn = pn->next;
2695                 if (pn)
2696                         break;
2697         }
2698
2699         if (pn && pos)
2700                 --(*pos);
2701
2702         return pn;
2703 }
2704
2705 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2706 {
2707         struct pneigh_entry *pn = pneigh_get_first(seq);
2708
2709         if (pn) {
2710                 --(*pos);
2711                 while (*pos) {
2712                         pn = pneigh_get_next(seq, pn, pos);
2713                         if (!pn)
2714                                 break;
2715                 }
2716         }
2717         return *pos ? NULL : pn;
2718 }
2719
2720 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2721 {
2722         struct neigh_seq_state *state = seq->private;
2723         void *rc;
2724         loff_t idxpos = *pos;
2725
2726         rc = neigh_get_idx(seq, &idxpos);
2727         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2728                 rc = pneigh_get_idx(seq, &idxpos);
2729
2730         return rc;
2731 }
2732
2733 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2734         __acquires(tbl->lock)
2735         __acquires(rcu_bh)
2736 {
2737         struct neigh_seq_state *state = seq->private;
2738
2739         state->tbl = tbl;
2740         state->bucket = 0;
2741         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2742
2743         rcu_read_lock_bh();
2744         state->nht = rcu_dereference_bh(tbl->nht);
2745         read_lock(&tbl->lock);
2746
2747         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2748 }
2749 EXPORT_SYMBOL(neigh_seq_start);
2750
2751 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2752 {
2753         struct neigh_seq_state *state;
2754         void *rc;
2755
2756         if (v == SEQ_START_TOKEN) {
2757                 rc = neigh_get_first(seq);
2758                 goto out;
2759         }
2760
2761         state = seq->private;
2762         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2763                 rc = neigh_get_next(seq, v, NULL);
2764                 if (rc)
2765                         goto out;
2766                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2767                         rc = pneigh_get_first(seq);
2768         } else {
2769                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2770                 rc = pneigh_get_next(seq, v, NULL);
2771         }
2772 out:
2773         ++(*pos);
2774         return rc;
2775 }
2776 EXPORT_SYMBOL(neigh_seq_next);
2777
2778 void neigh_seq_stop(struct seq_file *seq, void *v)
2779         __releases(tbl->lock)
2780         __releases(rcu_bh)
2781 {
2782         struct neigh_seq_state *state = seq->private;
2783         struct neigh_table *tbl = state->tbl;
2784
2785         read_unlock(&tbl->lock);
2786         rcu_read_unlock_bh();
2787 }
2788 EXPORT_SYMBOL(neigh_seq_stop);
2789
2790 /* statistics via seq_file */
2791
2792 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2793 {
2794         struct neigh_table *tbl = seq->private;
2795         int cpu;
2796
2797         if (*pos == 0)
2798                 return SEQ_START_TOKEN;
2799
2800         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2801                 if (!cpu_possible(cpu))
2802                         continue;
2803                 *pos = cpu+1;
2804                 return per_cpu_ptr(tbl->stats, cpu);
2805         }
2806         return NULL;
2807 }
2808
2809 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2810 {
2811         struct neigh_table *tbl = seq->private;
2812         int cpu;
2813
2814         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2815                 if (!cpu_possible(cpu))
2816                         continue;
2817                 *pos = cpu+1;
2818                 return per_cpu_ptr(tbl->stats, cpu);
2819         }
2820         (*pos)++;
2821         return NULL;
2822 }
2823
2824 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2825 {
2826
2827 }
2828
2829 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2830 {
2831         struct neigh_table *tbl = seq->private;
2832         struct neigh_statistics *st = v;
2833
2834         if (v == SEQ_START_TOKEN) {
2835                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2836                 return 0;
2837         }
2838
2839         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2840                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2841                    atomic_read(&tbl->entries),
2842
2843                    st->allocs,
2844                    st->destroys,
2845                    st->hash_grows,
2846
2847                    st->lookups,
2848                    st->hits,
2849
2850                    st->res_failed,
2851
2852                    st->rcv_probes_mcast,
2853                    st->rcv_probes_ucast,
2854
2855                    st->periodic_gc_runs,
2856                    st->forced_gc_runs,
2857                    st->unres_discards,
2858                    st->table_fulls
2859                    );
2860
2861         return 0;
2862 }
2863
2864 static const struct seq_operations neigh_stat_seq_ops = {
2865         .start  = neigh_stat_seq_start,
2866         .next   = neigh_stat_seq_next,
2867         .stop   = neigh_stat_seq_stop,
2868         .show   = neigh_stat_seq_show,
2869 };
2870
2871 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2872 {
2873         int ret = seq_open(file, &neigh_stat_seq_ops);
2874
2875         if (!ret) {
2876                 struct seq_file *sf = file->private_data;
2877                 sf->private = PDE_DATA(inode);
2878         }
2879         return ret;
2880 };
2881
2882 static const struct file_operations neigh_stat_seq_fops = {
2883         .owner   = THIS_MODULE,
2884         .open    = neigh_stat_seq_open,
2885         .read    = seq_read,
2886         .llseek  = seq_lseek,
2887         .release = seq_release,
2888 };
2889
2890 #endif /* CONFIG_PROC_FS */
2891
2892 static inline size_t neigh_nlmsg_size(void)
2893 {
2894         return NLMSG_ALIGN(sizeof(struct ndmsg))
2895                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2896                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2897                + nla_total_size(sizeof(struct nda_cacheinfo))
2898                + nla_total_size(4); /* NDA_PROBES */
2899 }
2900
2901 static void __neigh_notify(struct neighbour *n, int type, int flags,
2902                            u32 pid)
2903 {
2904         struct net *net = dev_net(n->dev);
2905         struct sk_buff *skb;
2906         int err = -ENOBUFS;
2907
2908         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2909         if (skb == NULL)
2910                 goto errout;
2911
2912         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2913         if (err < 0) {
2914                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2915                 WARN_ON(err == -EMSGSIZE);
2916                 kfree_skb(skb);
2917                 goto errout;
2918         }
2919         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2920         return;
2921 errout:
2922         if (err < 0)
2923                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2924 }
2925
2926 void neigh_app_ns(struct neighbour *n)
2927 {
2928         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2929 }
2930 EXPORT_SYMBOL(neigh_app_ns);
2931
2932 #ifdef CONFIG_SYSCTL
2933 static int zero;
2934 static int int_max = INT_MAX;
2935 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2936
2937 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2938                            void __user *buffer, size_t *lenp, loff_t *ppos)
2939 {
2940         int size, ret;
2941         struct ctl_table tmp = *ctl;
2942
2943         tmp.extra1 = &zero;
2944         tmp.extra2 = &unres_qlen_max;
2945         tmp.data = &size;
2946
2947         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2948         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2949
2950         if (write && !ret)
2951                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2952         return ret;
2953 }
2954
2955 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2956                                                    int family)
2957 {
2958         switch (family) {
2959         case AF_INET:
2960                 return __in_dev_arp_parms_get_rcu(dev);
2961         case AF_INET6:
2962                 return __in6_dev_nd_parms_get_rcu(dev);
2963         }
2964         return NULL;
2965 }
2966
2967 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2968                                   int index)
2969 {
2970         struct net_device *dev;
2971         int family = neigh_parms_family(p);
2972
2973         rcu_read_lock();
2974         for_each_netdev_rcu(net, dev) {
2975                 struct neigh_parms *dst_p =
2976                                 neigh_get_dev_parms_rcu(dev, family);
2977
2978                 if (dst_p && !test_bit(index, dst_p->data_state))
2979                         dst_p->data[index] = p->data[index];
2980         }
2981         rcu_read_unlock();
2982 }
2983
2984 static void neigh_proc_update(struct ctl_table *ctl, int write)
2985 {
2986         struct net_device *dev = ctl->extra1;
2987         struct neigh_parms *p = ctl->extra2;
2988         struct net *net = neigh_parms_net(p);
2989         int index = (int *) ctl->data - p->data;
2990
2991         if (!write)
2992                 return;
2993
2994         set_bit(index, p->data_state);
2995         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2996                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2997         if (!dev) /* NULL dev means this is default value */
2998                 neigh_copy_dflt_parms(net, p, index);
2999 }
3000
3001 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3002                                            void __user *buffer,
3003                                            size_t *lenp, loff_t *ppos)
3004 {
3005         struct ctl_table tmp = *ctl;
3006         int ret;
3007
3008         tmp.extra1 = &zero;
3009         tmp.extra2 = &int_max;
3010
3011         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3012         neigh_proc_update(ctl, write);
3013         return ret;
3014 }
3015
3016 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3017                         void __user *buffer, size_t *lenp, loff_t *ppos)
3018 {
3019         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3020
3021         neigh_proc_update(ctl, write);
3022         return ret;
3023 }
3024 EXPORT_SYMBOL(neigh_proc_dointvec);
3025
3026 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3027                                 void __user *buffer,
3028                                 size_t *lenp, loff_t *ppos)
3029 {
3030         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3031
3032         neigh_proc_update(ctl, write);
3033         return ret;
3034 }
3035 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3036
3037 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3038                                               void __user *buffer,
3039                                               size_t *lenp, loff_t *ppos)
3040 {
3041         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3042
3043         neigh_proc_update(ctl, write);
3044         return ret;
3045 }
3046
3047 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3048                                    void __user *buffer,
3049                                    size_t *lenp, loff_t *ppos)
3050 {
3051         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3052
3053         neigh_proc_update(ctl, write);
3054         return ret;
3055 }
3056 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3057
3058 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3059                                           void __user *buffer,
3060                                           size_t *lenp, loff_t *ppos)
3061 {
3062         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3063
3064         neigh_proc_update(ctl, write);
3065         return ret;
3066 }
3067
3068 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3069                                           void __user *buffer,
3070                                           size_t *lenp, loff_t *ppos)
3071 {
3072         struct neigh_parms *p = ctl->extra2;
3073         int ret;
3074
3075         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3076                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3077         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3078                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3079         else
3080                 ret = -1;
3081
3082         if (write && ret == 0) {
3083                 /* update reachable_time as well, otherwise, the change will
3084                  * only be effective after the next time neigh_periodic_work
3085                  * decides to recompute it
3086                  */
3087                 p->reachable_time =
3088                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3089         }
3090         return ret;
3091 }
3092
3093 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3094         (&((struct neigh_parms *) 0)->data[index])
3095
3096 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3097         [NEIGH_VAR_ ## attr] = { \
3098                 .procname       = name, \
3099                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3100                 .maxlen         = sizeof(int), \
3101                 .mode           = mval, \
3102                 .proc_handler   = proc, \
3103         }
3104
3105 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3106         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3107
3108 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3109         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3110
3111 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3112         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3113
3114 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3115         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3116
3117 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3118         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3119
3120 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3121         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3122
3123 static struct neigh_sysctl_table {
3124         struct ctl_table_header *sysctl_header;
3125         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3126 } neigh_sysctl_template __read_mostly = {
3127         .neigh_vars = {
3128                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3129                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3130                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3131                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3132                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3133                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3134                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3135                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3136                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3137                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3138                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3139                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3140                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3141                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3142                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3143                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3144                 [NEIGH_VAR_GC_INTERVAL] = {
3145                         .procname       = "gc_interval",
3146                         .maxlen         = sizeof(int),
3147                         .mode           = 0644,
3148                         .proc_handler   = proc_dointvec_jiffies,
3149                 },
3150                 [NEIGH_VAR_GC_THRESH1] = {
3151                         .procname       = "gc_thresh1",
3152                         .maxlen         = sizeof(int),
3153                         .mode           = 0644,
3154                         .extra1         = &zero,
3155                         .extra2         = &int_max,
3156                         .proc_handler   = proc_dointvec_minmax,
3157                 },
3158                 [NEIGH_VAR_GC_THRESH2] = {
3159                         .procname       = "gc_thresh2",
3160                         .maxlen         = sizeof(int),
3161                         .mode           = 0644,
3162                         .extra1         = &zero,
3163                         .extra2         = &int_max,
3164                         .proc_handler   = proc_dointvec_minmax,
3165                 },
3166                 [NEIGH_VAR_GC_THRESH3] = {
3167                         .procname       = "gc_thresh3",
3168                         .maxlen         = sizeof(int),
3169                         .mode           = 0644,
3170                         .extra1         = &zero,
3171                         .extra2         = &int_max,
3172                         .proc_handler   = proc_dointvec_minmax,
3173                 },
3174                 {},
3175         },
3176 };
3177
3178 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3179                           proc_handler *handler)
3180 {
3181         int i;
3182         struct neigh_sysctl_table *t;
3183         const char *dev_name_source;
3184         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3185         char *p_name;
3186
3187         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3188         if (!t)
3189                 goto err;
3190
3191         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3192                 t->neigh_vars[i].data += (long) p;
3193                 t->neigh_vars[i].extra1 = dev;
3194                 t->neigh_vars[i].extra2 = p;
3195         }
3196
3197         if (dev) {
3198                 dev_name_source = dev->name;
3199                 /* Terminate the table early */
3200                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3201                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3202         } else {
3203                 struct neigh_table *tbl = p->tbl;
3204                 dev_name_source = "default";
3205                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3206                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3207                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3208                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3209         }
3210
3211         if (handler) {
3212                 /* RetransTime */
3213                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3214                 /* ReachableTime */
3215                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3216                 /* RetransTime (in milliseconds)*/
3217                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3218                 /* ReachableTime (in milliseconds) */
3219                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3220         } else {
3221                 /* Those handlers will update p->reachable_time after
3222                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3223                  * applied after the next neighbour update instead of waiting for
3224                  * neigh_periodic_work to update its value (can be multiple minutes)
3225                  * So any handler that replaces them should do this as well
3226                  */
3227                 /* ReachableTime */
3228                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3229                         neigh_proc_base_reachable_time;
3230                 /* ReachableTime (in milliseconds) */
3231                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3232                         neigh_proc_base_reachable_time;
3233         }
3234
3235         /* Don't export sysctls to unprivileged users */
3236         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3237                 t->neigh_vars[0].procname = NULL;
3238
3239         switch (neigh_parms_family(p)) {
3240         case AF_INET:
3241               p_name = "ipv4";
3242               break;
3243         case AF_INET6:
3244               p_name = "ipv6";
3245               break;
3246         default:
3247               BUG();
3248         }
3249
3250         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3251                 p_name, dev_name_source);
3252         t->sysctl_header =
3253                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3254         if (!t->sysctl_header)
3255                 goto free;
3256
3257         p->sysctl_table = t;
3258         return 0;
3259
3260 free:
3261         kfree(t);
3262 err:
3263         return -ENOBUFS;
3264 }
3265 EXPORT_SYMBOL(neigh_sysctl_register);
3266
3267 void neigh_sysctl_unregister(struct neigh_parms *p)
3268 {
3269         if (p->sysctl_table) {
3270                 struct neigh_sysctl_table *t = p->sysctl_table;
3271                 p->sysctl_table = NULL;
3272                 unregister_net_sysctl_table(t->sysctl_header);
3273                 kfree(t);
3274         }
3275 }
3276 EXPORT_SYMBOL(neigh_sysctl_unregister);
3277
3278 #endif  /* CONFIG_SYSCTL */
3279
3280 static int __init neigh_init(void)
3281 {
3282         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3283         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3284         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3285
3286         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3287                       0);
3288         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3289
3290         return 0;
3291 }
3292
3293 subsys_initcall(neigh_init);
3294