GNU Linux-libre 4.9.317-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags);
58 static void neigh_update_notify(struct neighbour *neigh);
59 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
60                                     struct net_device *dev);
61
62 #ifdef CONFIG_PROC_FS
63 static const struct file_operations neigh_stat_seq_fops;
64 #endif
65
66 /*
67    Neighbour hash table buckets are protected with rwlock tbl->lock.
68
69    - All the scans/updates to hash buckets MUST be made under this lock.
70    - NOTHING clever should be made under this lock: no callbacks
71      to protocol backends, no attempts to send something to network.
72      It will result in deadlocks, if backend/driver wants to use neighbour
73      cache.
74    - If the entry requires some non-trivial actions, increase
75      its reference count and release table lock.
76
77    Neighbour entries are protected:
78    - with reference count.
79    - with rwlock neigh->lock
80
81    Reference count prevents destruction.
82
83    neigh->lock mainly serializes ll address data and its validity state.
84    However, the same lock is used to protect another entry fields:
85     - timer
86     - resolution queue
87
88    Again, nothing clever shall be made under neigh->lock,
89    the most complicated procedure, which we allow is dev->hard_header.
90    It is supposed, that dev->hard_header is simplistic and does
91    not make callbacks to neighbour tables.
92  */
93
94 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
95 {
96         kfree_skb(skb);
97         return -ENETDOWN;
98 }
99
100 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 {
102         if (neigh->parms->neigh_cleanup)
103                 neigh->parms->neigh_cleanup(neigh);
104
105         __neigh_notify(neigh, RTM_DELNEIGH, 0);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121
122 static int neigh_forced_gc(struct neigh_table *tbl)
123 {
124         int shrunk = 0;
125         int i;
126         struct neigh_hash_table *nht;
127
128         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
129
130         write_lock_bh(&tbl->lock);
131         nht = rcu_dereference_protected(tbl->nht,
132                                         lockdep_is_held(&tbl->lock));
133         for (i = 0; i < (1 << nht->hash_shift); i++) {
134                 struct neighbour *n;
135                 struct neighbour __rcu **np;
136
137                 np = &nht->hash_buckets[i];
138                 while ((n = rcu_dereference_protected(*np,
139                                         lockdep_is_held(&tbl->lock))) != NULL) {
140                         /* Neighbour record may be discarded if:
141                          * - nobody refers to it.
142                          * - it is not permanent
143                          */
144                         write_lock(&n->lock);
145                         if (atomic_read(&n->refcnt) == 1 &&
146                             !(n->nud_state & NUD_PERMANENT)) {
147                                 rcu_assign_pointer(*np,
148                                         rcu_dereference_protected(n->next,
149                                                   lockdep_is_held(&tbl->lock)));
150                                 n->dead = 1;
151                                 shrunk  = 1;
152                                 write_unlock(&n->lock);
153                                 neigh_cleanup_and_release(n);
154                                 continue;
155                         }
156                         write_unlock(&n->lock);
157                         np = &n->next;
158                 }
159         }
160
161         tbl->last_flush = jiffies;
162
163         write_unlock_bh(&tbl->lock);
164
165         return shrunk;
166 }
167
168 static void neigh_add_timer(struct neighbour *n, unsigned long when)
169 {
170         neigh_hold(n);
171         if (unlikely(mod_timer(&n->timer, when))) {
172                 printk("NEIGH: BUG, double timer add, state is %x\n",
173                        n->nud_state);
174                 dump_stack();
175         }
176 }
177
178 static int neigh_del_timer(struct neighbour *n)
179 {
180         if ((n->nud_state & NUD_IN_TIMER) &&
181             del_timer(&n->timer)) {
182                 neigh_release(n);
183                 return 1;
184         }
185         return 0;
186 }
187
188 static void pneigh_queue_purge(struct sk_buff_head *list)
189 {
190         struct sk_buff *skb;
191
192         while ((skb = skb_dequeue(list)) != NULL) {
193                 dev_put(skb->dev);
194                 kfree_skb(skb);
195         }
196 }
197
198 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
199 {
200         int i;
201         struct neigh_hash_table *nht;
202
203         nht = rcu_dereference_protected(tbl->nht,
204                                         lockdep_is_held(&tbl->lock));
205
206         for (i = 0; i < (1 << nht->hash_shift); i++) {
207                 struct neighbour *n;
208                 struct neighbour __rcu **np = &nht->hash_buckets[i];
209
210                 while ((n = rcu_dereference_protected(*np,
211                                         lockdep_is_held(&tbl->lock))) != NULL) {
212                         if (dev && n->dev != dev) {
213                                 np = &n->next;
214                                 continue;
215                         }
216                         rcu_assign_pointer(*np,
217                                    rcu_dereference_protected(n->next,
218                                                 lockdep_is_held(&tbl->lock)));
219                         write_lock(&n->lock);
220                         neigh_del_timer(n);
221                         n->dead = 1;
222
223                         if (atomic_read(&n->refcnt) != 1) {
224                                 /* The most unpleasant situation.
225                                    We must destroy neighbour entry,
226                                    but someone still uses it.
227
228                                    The destroy will be delayed until
229                                    the last user releases us, but
230                                    we must kill timers etc. and move
231                                    it to safe state.
232                                  */
233                                 __skb_queue_purge(&n->arp_queue);
234                                 n->arp_queue_len_bytes = 0;
235                                 n->output = neigh_blackhole;
236                                 if (n->nud_state & NUD_VALID)
237                                         n->nud_state = NUD_NOARP;
238                                 else
239                                         n->nud_state = NUD_NONE;
240                                 neigh_dbg(2, "neigh %p is stray\n", n);
241                         }
242                         write_unlock(&n->lock);
243                         neigh_cleanup_and_release(n);
244                 }
245         }
246 }
247
248 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
249 {
250         write_lock_bh(&tbl->lock);
251         neigh_flush_dev(tbl, dev);
252         write_unlock_bh(&tbl->lock);
253 }
254 EXPORT_SYMBOL(neigh_changeaddr);
255
256 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
257 {
258         write_lock_bh(&tbl->lock);
259         neigh_flush_dev(tbl, dev);
260         pneigh_ifdown_and_unlock(tbl, dev);
261
262         del_timer_sync(&tbl->proxy_timer);
263         pneigh_queue_purge(&tbl->proxy_queue);
264         return 0;
265 }
266 EXPORT_SYMBOL(neigh_ifdown);
267
268 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
269 {
270         struct neighbour *n = NULL;
271         unsigned long now = jiffies;
272         int entries;
273
274         entries = atomic_inc_return(&tbl->entries) - 1;
275         if (entries >= tbl->gc_thresh3 ||
276             (entries >= tbl->gc_thresh2 &&
277              time_after(now, tbl->last_flush + 5 * HZ))) {
278                 if (!neigh_forced_gc(tbl) &&
279                     entries >= tbl->gc_thresh3) {
280                         net_info_ratelimited("%s: neighbor table overflow!\n",
281                                              tbl->id);
282                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
283                         goto out_entries;
284                 }
285         }
286
287         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
288         if (!n)
289                 goto out_entries;
290
291         __skb_queue_head_init(&n->arp_queue);
292         rwlock_init(&n->lock);
293         seqlock_init(&n->ha_lock);
294         n->updated        = n->used = now;
295         n->nud_state      = NUD_NONE;
296         n->output         = neigh_blackhole;
297         seqlock_init(&n->hh.hh_lock);
298         n->parms          = neigh_parms_clone(&tbl->parms);
299         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
300
301         NEIGH_CACHE_STAT_INC(tbl, allocs);
302         n->tbl            = tbl;
303         atomic_set(&n->refcnt, 1);
304         n->dead           = 1;
305 out:
306         return n;
307
308 out_entries:
309         atomic_dec(&tbl->entries);
310         goto out;
311 }
312
313 static void neigh_get_hash_rnd(u32 *x)
314 {
315         get_random_bytes(x, sizeof(*x));
316         *x |= 1;
317 }
318
319 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
320 {
321         size_t size = (1 << shift) * sizeof(struct neighbour *);
322         struct neigh_hash_table *ret;
323         struct neighbour __rcu **buckets;
324         int i;
325
326         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
327         if (!ret)
328                 return NULL;
329         if (size <= PAGE_SIZE) {
330                 buckets = kzalloc(size, GFP_ATOMIC);
331         } else {
332                 buckets = (struct neighbour __rcu **)
333                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
334                                            get_order(size));
335                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
336         }
337         if (!buckets) {
338                 kfree(ret);
339                 return NULL;
340         }
341         ret->hash_buckets = buckets;
342         ret->hash_shift = shift;
343         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
344                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
345         return ret;
346 }
347
348 static void neigh_hash_free_rcu(struct rcu_head *head)
349 {
350         struct neigh_hash_table *nht = container_of(head,
351                                                     struct neigh_hash_table,
352                                                     rcu);
353         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
354         struct neighbour __rcu **buckets = nht->hash_buckets;
355
356         if (size <= PAGE_SIZE) {
357                 kfree(buckets);
358         } else {
359                 kmemleak_free(buckets);
360                 free_pages((unsigned long)buckets, get_order(size));
361         }
362         kfree(nht);
363 }
364
365 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
366                                                 unsigned long new_shift)
367 {
368         unsigned int i, hash;
369         struct neigh_hash_table *new_nht, *old_nht;
370
371         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
372
373         old_nht = rcu_dereference_protected(tbl->nht,
374                                             lockdep_is_held(&tbl->lock));
375         new_nht = neigh_hash_alloc(new_shift);
376         if (!new_nht)
377                 return old_nht;
378
379         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
380                 struct neighbour *n, *next;
381
382                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
383                                                    lockdep_is_held(&tbl->lock));
384                      n != NULL;
385                      n = next) {
386                         hash = tbl->hash(n->primary_key, n->dev,
387                                          new_nht->hash_rnd);
388
389                         hash >>= (32 - new_nht->hash_shift);
390                         next = rcu_dereference_protected(n->next,
391                                                 lockdep_is_held(&tbl->lock));
392
393                         rcu_assign_pointer(n->next,
394                                            rcu_dereference_protected(
395                                                 new_nht->hash_buckets[hash],
396                                                 lockdep_is_held(&tbl->lock)));
397                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
398                 }
399         }
400
401         rcu_assign_pointer(tbl->nht, new_nht);
402         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
403         return new_nht;
404 }
405
406 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
407                                struct net_device *dev)
408 {
409         struct neighbour *n;
410
411         NEIGH_CACHE_STAT_INC(tbl, lookups);
412
413         rcu_read_lock_bh();
414         n = __neigh_lookup_noref(tbl, pkey, dev);
415         if (n) {
416                 if (!atomic_inc_not_zero(&n->refcnt))
417                         n = NULL;
418                 NEIGH_CACHE_STAT_INC(tbl, hits);
419         }
420
421         rcu_read_unlock_bh();
422         return n;
423 }
424 EXPORT_SYMBOL(neigh_lookup);
425
426 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
427                                      const void *pkey)
428 {
429         struct neighbour *n;
430         int key_len = tbl->key_len;
431         u32 hash_val;
432         struct neigh_hash_table *nht;
433
434         NEIGH_CACHE_STAT_INC(tbl, lookups);
435
436         rcu_read_lock_bh();
437         nht = rcu_dereference_bh(tbl->nht);
438         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
439
440         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
441              n != NULL;
442              n = rcu_dereference_bh(n->next)) {
443                 if (!memcmp(n->primary_key, pkey, key_len) &&
444                     net_eq(dev_net(n->dev), net)) {
445                         if (!atomic_inc_not_zero(&n->refcnt))
446                                 n = NULL;
447                         NEIGH_CACHE_STAT_INC(tbl, hits);
448                         break;
449                 }
450         }
451
452         rcu_read_unlock_bh();
453         return n;
454 }
455 EXPORT_SYMBOL(neigh_lookup_nodev);
456
457 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
458                                  struct net_device *dev, bool want_ref)
459 {
460         u32 hash_val;
461         int key_len = tbl->key_len;
462         int error;
463         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
464         struct neigh_hash_table *nht;
465
466         if (!n) {
467                 rc = ERR_PTR(-ENOBUFS);
468                 goto out;
469         }
470
471         memcpy(n->primary_key, pkey, key_len);
472         n->dev = dev;
473         dev_hold(dev);
474
475         /* Protocol specific setup. */
476         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
477                 rc = ERR_PTR(error);
478                 goto out_neigh_release;
479         }
480
481         if (dev->netdev_ops->ndo_neigh_construct) {
482                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
483                 if (error < 0) {
484                         rc = ERR_PTR(error);
485                         goto out_neigh_release;
486                 }
487         }
488
489         /* Device specific setup. */
490         if (n->parms->neigh_setup &&
491             (error = n->parms->neigh_setup(n)) < 0) {
492                 rc = ERR_PTR(error);
493                 goto out_neigh_release;
494         }
495
496         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
497
498         write_lock_bh(&tbl->lock);
499         nht = rcu_dereference_protected(tbl->nht,
500                                         lockdep_is_held(&tbl->lock));
501
502         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
503                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
504
505         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
506
507         if (n->parms->dead) {
508                 rc = ERR_PTR(-EINVAL);
509                 goto out_tbl_unlock;
510         }
511
512         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
513                                             lockdep_is_held(&tbl->lock));
514              n1 != NULL;
515              n1 = rcu_dereference_protected(n1->next,
516                         lockdep_is_held(&tbl->lock))) {
517                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
518                         if (want_ref)
519                                 neigh_hold(n1);
520                         rc = n1;
521                         goto out_tbl_unlock;
522                 }
523         }
524
525         n->dead = 0;
526         if (want_ref)
527                 neigh_hold(n);
528         rcu_assign_pointer(n->next,
529                            rcu_dereference_protected(nht->hash_buckets[hash_val],
530                                                      lockdep_is_held(&tbl->lock)));
531         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
532         write_unlock_bh(&tbl->lock);
533         neigh_dbg(2, "neigh %p is created\n", n);
534         rc = n;
535 out:
536         return rc;
537 out_tbl_unlock:
538         write_unlock_bh(&tbl->lock);
539 out_neigh_release:
540         neigh_release(n);
541         goto out;
542 }
543 EXPORT_SYMBOL(__neigh_create);
544
545 static u32 pneigh_hash(const void *pkey, int key_len)
546 {
547         u32 hash_val = *(u32 *)(pkey + key_len - 4);
548         hash_val ^= (hash_val >> 16);
549         hash_val ^= hash_val >> 8;
550         hash_val ^= hash_val >> 4;
551         hash_val &= PNEIGH_HASHMASK;
552         return hash_val;
553 }
554
555 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
556                                               struct net *net,
557                                               const void *pkey,
558                                               int key_len,
559                                               struct net_device *dev)
560 {
561         while (n) {
562                 if (!memcmp(n->key, pkey, key_len) &&
563                     net_eq(pneigh_net(n), net) &&
564                     (n->dev == dev || !n->dev))
565                         return n;
566                 n = n->next;
567         }
568         return NULL;
569 }
570
571 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
572                 struct net *net, const void *pkey, struct net_device *dev)
573 {
574         int key_len = tbl->key_len;
575         u32 hash_val = pneigh_hash(pkey, key_len);
576
577         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
578                                  net, pkey, key_len, dev);
579 }
580 EXPORT_SYMBOL_GPL(__pneigh_lookup);
581
582 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
583                                     struct net *net, const void *pkey,
584                                     struct net_device *dev, int creat)
585 {
586         struct pneigh_entry *n;
587         int key_len = tbl->key_len;
588         u32 hash_val = pneigh_hash(pkey, key_len);
589
590         read_lock_bh(&tbl->lock);
591         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
592                               net, pkey, key_len, dev);
593         read_unlock_bh(&tbl->lock);
594
595         if (n || !creat)
596                 goto out;
597
598         ASSERT_RTNL();
599
600         n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
601         if (!n)
602                 goto out;
603
604         write_pnet(&n->net, net);
605         memcpy(n->key, pkey, key_len);
606         n->dev = dev;
607         if (dev)
608                 dev_hold(dev);
609
610         if (tbl->pconstructor && tbl->pconstructor(n)) {
611                 if (dev)
612                         dev_put(dev);
613                 kfree(n);
614                 n = NULL;
615                 goto out;
616         }
617
618         write_lock_bh(&tbl->lock);
619         n->next = tbl->phash_buckets[hash_val];
620         tbl->phash_buckets[hash_val] = n;
621         write_unlock_bh(&tbl->lock);
622 out:
623         return n;
624 }
625 EXPORT_SYMBOL(pneigh_lookup);
626
627
628 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
629                   struct net_device *dev)
630 {
631         struct pneigh_entry *n, **np;
632         int key_len = tbl->key_len;
633         u32 hash_val = pneigh_hash(pkey, key_len);
634
635         write_lock_bh(&tbl->lock);
636         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
637              np = &n->next) {
638                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
639                     net_eq(pneigh_net(n), net)) {
640                         *np = n->next;
641                         write_unlock_bh(&tbl->lock);
642                         if (tbl->pdestructor)
643                                 tbl->pdestructor(n);
644                         if (n->dev)
645                                 dev_put(n->dev);
646                         kfree(n);
647                         return 0;
648                 }
649         }
650         write_unlock_bh(&tbl->lock);
651         return -ENOENT;
652 }
653
654 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
655                                     struct net_device *dev)
656 {
657         struct pneigh_entry *n, **np, *freelist = NULL;
658         u32 h;
659
660         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
661                 np = &tbl->phash_buckets[h];
662                 while ((n = *np) != NULL) {
663                         if (!dev || n->dev == dev) {
664                                 *np = n->next;
665                                 n->next = freelist;
666                                 freelist = n;
667                                 continue;
668                         }
669                         np = &n->next;
670                 }
671         }
672         write_unlock_bh(&tbl->lock);
673         while ((n = freelist)) {
674                 freelist = n->next;
675                 n->next = NULL;
676                 if (tbl->pdestructor)
677                         tbl->pdestructor(n);
678                 if (n->dev)
679                         dev_put(n->dev);
680                 kfree(n);
681         }
682         return -ENOENT;
683 }
684
685 static void neigh_parms_destroy(struct neigh_parms *parms);
686
687 static inline void neigh_parms_put(struct neigh_parms *parms)
688 {
689         if (atomic_dec_and_test(&parms->refcnt))
690                 neigh_parms_destroy(parms);
691 }
692
693 /*
694  *      neighbour must already be out of the table;
695  *
696  */
697 void neigh_destroy(struct neighbour *neigh)
698 {
699         struct net_device *dev = neigh->dev;
700
701         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
702
703         if (!neigh->dead) {
704                 pr_warn("Destroying alive neighbour %p\n", neigh);
705                 dump_stack();
706                 return;
707         }
708
709         if (neigh_del_timer(neigh))
710                 pr_warn("Impossible event\n");
711
712         write_lock_bh(&neigh->lock);
713         __skb_queue_purge(&neigh->arp_queue);
714         write_unlock_bh(&neigh->lock);
715         neigh->arp_queue_len_bytes = 0;
716
717         if (dev->netdev_ops->ndo_neigh_destroy)
718                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
719
720         dev_put(dev);
721         neigh_parms_put(neigh->parms);
722
723         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
724
725         atomic_dec(&neigh->tbl->entries);
726         kfree_rcu(neigh, rcu);
727 }
728 EXPORT_SYMBOL(neigh_destroy);
729
730 /* Neighbour state is suspicious;
731    disable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_suspect(struct neighbour *neigh)
736 {
737         neigh_dbg(2, "neigh %p is suspected\n", neigh);
738
739         neigh->output = neigh->ops->output;
740 }
741
742 /* Neighbour state is OK;
743    enable fast path.
744
745    Called with write_locked neigh.
746  */
747 static void neigh_connect(struct neighbour *neigh)
748 {
749         neigh_dbg(2, "neigh %p is connected\n", neigh);
750
751         neigh->output = neigh->ops->connected_output;
752 }
753
754 static void neigh_periodic_work(struct work_struct *work)
755 {
756         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
757         struct neighbour *n;
758         struct neighbour __rcu **np;
759         unsigned int i;
760         struct neigh_hash_table *nht;
761
762         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
763
764         write_lock_bh(&tbl->lock);
765         nht = rcu_dereference_protected(tbl->nht,
766                                         lockdep_is_held(&tbl->lock));
767
768         /*
769          *      periodically recompute ReachableTime from random function
770          */
771
772         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773                 struct neigh_parms *p;
774                 tbl->last_rand = jiffies;
775                 list_for_each_entry(p, &tbl->parms_list, list)
776                         p->reachable_time =
777                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
778         }
779
780         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
781                 goto out;
782
783         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
784                 np = &nht->hash_buckets[i];
785
786                 while ((n = rcu_dereference_protected(*np,
787                                 lockdep_is_held(&tbl->lock))) != NULL) {
788                         unsigned int state;
789
790                         write_lock(&n->lock);
791
792                         state = n->nud_state;
793                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
794                                 write_unlock(&n->lock);
795                                 goto next_elt;
796                         }
797
798                         if (time_before(n->used, n->confirmed))
799                                 n->used = n->confirmed;
800
801                         if (atomic_read(&n->refcnt) == 1 &&
802                             (state == NUD_FAILED ||
803                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
804                                 *np = n->next;
805                                 n->dead = 1;
806                                 write_unlock(&n->lock);
807                                 neigh_cleanup_and_release(n);
808                                 continue;
809                         }
810                         write_unlock(&n->lock);
811
812 next_elt:
813                         np = &n->next;
814                 }
815                 /*
816                  * It's fine to release lock here, even if hash table
817                  * grows while we are preempted.
818                  */
819                 write_unlock_bh(&tbl->lock);
820                 cond_resched();
821                 write_lock_bh(&tbl->lock);
822                 nht = rcu_dereference_protected(tbl->nht,
823                                                 lockdep_is_held(&tbl->lock));
824         }
825 out:
826         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
827          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
828          * BASE_REACHABLE_TIME.
829          */
830         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
831                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
832         write_unlock_bh(&tbl->lock);
833 }
834
835 static __inline__ int neigh_max_probes(struct neighbour *n)
836 {
837         struct neigh_parms *p = n->parms;
838         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
839                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
840                 NEIGH_VAR(p, MCAST_PROBES));
841 }
842
843 static void neigh_invalidate(struct neighbour *neigh)
844         __releases(neigh->lock)
845         __acquires(neigh->lock)
846 {
847         struct sk_buff *skb;
848
849         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
850         neigh_dbg(2, "neigh %p is failed\n", neigh);
851         neigh->updated = jiffies;
852
853         /* It is very thin place. report_unreachable is very complicated
854            routine. Particularly, it can hit the same neighbour entry!
855
856            So that, we try to be accurate and avoid dead loop. --ANK
857          */
858         while (neigh->nud_state == NUD_FAILED &&
859                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
860                 write_unlock(&neigh->lock);
861                 neigh->ops->error_report(neigh, skb);
862                 write_lock(&neigh->lock);
863         }
864         __skb_queue_purge(&neigh->arp_queue);
865         neigh->arp_queue_len_bytes = 0;
866 }
867
868 static void neigh_probe(struct neighbour *neigh)
869         __releases(neigh->lock)
870 {
871         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
872         /* keep skb alive even if arp_queue overflows */
873         if (skb)
874                 skb = skb_clone(skb, GFP_ATOMIC);
875         write_unlock(&neigh->lock);
876         if (neigh->ops->solicit)
877                 neigh->ops->solicit(neigh, skb);
878         atomic_inc(&neigh->probes);
879         kfree_skb(skb);
880 }
881
882 /* Called when a timer expires for a neighbour entry. */
883
884 static void neigh_timer_handler(unsigned long arg)
885 {
886         unsigned long now, next;
887         struct neighbour *neigh = (struct neighbour *)arg;
888         unsigned int state;
889         int notify = 0;
890
891         write_lock(&neigh->lock);
892
893         state = neigh->nud_state;
894         now = jiffies;
895         next = now + HZ;
896
897         if (!(state & NUD_IN_TIMER))
898                 goto out;
899
900         if (state & NUD_REACHABLE) {
901                 if (time_before_eq(now,
902                                    neigh->confirmed + neigh->parms->reachable_time)) {
903                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
904                         next = neigh->confirmed + neigh->parms->reachable_time;
905                 } else if (time_before_eq(now,
906                                           neigh->used +
907                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
908                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
909                         neigh->nud_state = NUD_DELAY;
910                         neigh->updated = jiffies;
911                         neigh_suspect(neigh);
912                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
913                 } else {
914                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
915                         neigh->nud_state = NUD_STALE;
916                         neigh->updated = jiffies;
917                         neigh_suspect(neigh);
918                         notify = 1;
919                 }
920         } else if (state & NUD_DELAY) {
921                 if (time_before_eq(now,
922                                    neigh->confirmed +
923                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
924                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
925                         neigh->nud_state = NUD_REACHABLE;
926                         neigh->updated = jiffies;
927                         neigh_connect(neigh);
928                         notify = 1;
929                         next = neigh->confirmed + neigh->parms->reachable_time;
930                 } else {
931                         neigh_dbg(2, "neigh %p is probed\n", neigh);
932                         neigh->nud_state = NUD_PROBE;
933                         neigh->updated = jiffies;
934                         atomic_set(&neigh->probes, 0);
935                         notify = 1;
936                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
937                 }
938         } else {
939                 /* NUD_PROBE|NUD_INCOMPLETE */
940                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
941         }
942
943         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
944             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
945                 neigh->nud_state = NUD_FAILED;
946                 notify = 1;
947                 neigh_invalidate(neigh);
948                 goto out;
949         }
950
951         if (neigh->nud_state & NUD_IN_TIMER) {
952                 if (time_before(next, jiffies + HZ/2))
953                         next = jiffies + HZ/2;
954                 if (!mod_timer(&neigh->timer, next))
955                         neigh_hold(neigh);
956         }
957         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
958                 neigh_probe(neigh);
959         } else {
960 out:
961                 write_unlock(&neigh->lock);
962         }
963
964         if (notify)
965                 neigh_update_notify(neigh);
966
967         neigh_release(neigh);
968 }
969
970 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
971 {
972         int rc;
973         bool immediate_probe = false;
974
975         write_lock_bh(&neigh->lock);
976
977         rc = 0;
978         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
979                 goto out_unlock_bh;
980         if (neigh->dead)
981                 goto out_dead;
982
983         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
984                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
985                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
986                         unsigned long next, now = jiffies;
987
988                         atomic_set(&neigh->probes,
989                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
990                         neigh_del_timer(neigh);
991                         neigh->nud_state     = NUD_INCOMPLETE;
992                         neigh->updated = now;
993                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
994                                          HZ/2);
995                         neigh_add_timer(neigh, next);
996                         immediate_probe = true;
997                 } else {
998                         neigh->nud_state = NUD_FAILED;
999                         neigh->updated = jiffies;
1000                         write_unlock_bh(&neigh->lock);
1001
1002                         kfree_skb(skb);
1003                         return 1;
1004                 }
1005         } else if (neigh->nud_state & NUD_STALE) {
1006                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1007                 neigh_del_timer(neigh);
1008                 neigh->nud_state = NUD_DELAY;
1009                 neigh->updated = jiffies;
1010                 neigh_add_timer(neigh, jiffies +
1011                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1012         }
1013
1014         if (neigh->nud_state == NUD_INCOMPLETE) {
1015                 if (skb) {
1016                         while (neigh->arp_queue_len_bytes + skb->truesize >
1017                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1018                                 struct sk_buff *buff;
1019
1020                                 buff = __skb_dequeue(&neigh->arp_queue);
1021                                 if (!buff)
1022                                         break;
1023                                 neigh->arp_queue_len_bytes -= buff->truesize;
1024                                 kfree_skb(buff);
1025                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1026                         }
1027                         skb_dst_force(skb);
1028                         __skb_queue_tail(&neigh->arp_queue, skb);
1029                         neigh->arp_queue_len_bytes += skb->truesize;
1030                 }
1031                 rc = 1;
1032         }
1033 out_unlock_bh:
1034         if (immediate_probe)
1035                 neigh_probe(neigh);
1036         else
1037                 write_unlock(&neigh->lock);
1038         local_bh_enable();
1039         return rc;
1040
1041 out_dead:
1042         if (neigh->nud_state & NUD_STALE)
1043                 goto out_unlock_bh;
1044         write_unlock_bh(&neigh->lock);
1045         kfree_skb(skb);
1046         return 1;
1047 }
1048 EXPORT_SYMBOL(__neigh_event_send);
1049
1050 static void neigh_update_hhs(struct neighbour *neigh)
1051 {
1052         struct hh_cache *hh;
1053         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1054                 = NULL;
1055
1056         if (neigh->dev->header_ops)
1057                 update = neigh->dev->header_ops->cache_update;
1058
1059         if (update) {
1060                 hh = &neigh->hh;
1061                 if (READ_ONCE(hh->hh_len)) {
1062                         write_seqlock_bh(&hh->hh_lock);
1063                         update(hh, neigh->dev, neigh->ha);
1064                         write_sequnlock_bh(&hh->hh_lock);
1065                 }
1066         }
1067 }
1068
1069
1070
1071 /* Generic update routine.
1072    -- lladdr is new lladdr or NULL, if it is not supplied.
1073    -- new    is new state.
1074    -- flags
1075         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1076                                 if it is different.
1077         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1078                                 lladdr instead of overriding it
1079                                 if it is different.
1080         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1081
1082         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1083                                 NTF_ROUTER flag.
1084         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1085                                 a router.
1086
1087    Caller MUST hold reference count on the entry.
1088  */
1089
1090 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1091                  u32 flags)
1092 {
1093         u8 old;
1094         int err;
1095         int notify = 0;
1096         struct net_device *dev;
1097         int update_isrouter = 0;
1098
1099         write_lock_bh(&neigh->lock);
1100
1101         dev    = neigh->dev;
1102         old    = neigh->nud_state;
1103         err    = -EPERM;
1104
1105         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1106             (old & (NUD_NOARP | NUD_PERMANENT)))
1107                 goto out;
1108         if (neigh->dead)
1109                 goto out;
1110
1111         if (!(new & NUD_VALID)) {
1112                 neigh_del_timer(neigh);
1113                 if (old & NUD_CONNECTED)
1114                         neigh_suspect(neigh);
1115                 neigh->nud_state = new;
1116                 err = 0;
1117                 notify = old & NUD_VALID;
1118                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1119                     (new & NUD_FAILED)) {
1120                         neigh_invalidate(neigh);
1121                         notify = 1;
1122                 }
1123                 goto out;
1124         }
1125
1126         /* Compare new lladdr with cached one */
1127         if (!dev->addr_len) {
1128                 /* First case: device needs no address. */
1129                 lladdr = neigh->ha;
1130         } else if (lladdr) {
1131                 /* The second case: if something is already cached
1132                    and a new address is proposed:
1133                    - compare new & old
1134                    - if they are different, check override flag
1135                  */
1136                 if ((old & NUD_VALID) &&
1137                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1138                         lladdr = neigh->ha;
1139         } else {
1140                 /* No address is supplied; if we know something,
1141                    use it, otherwise discard the request.
1142                  */
1143                 err = -EINVAL;
1144                 if (!(old & NUD_VALID))
1145                         goto out;
1146                 lladdr = neigh->ha;
1147         }
1148
1149         /* Update confirmed timestamp for neighbour entry after we
1150          * received ARP packet even if it doesn't change IP to MAC binding.
1151          */
1152         if (new & NUD_CONNECTED)
1153                 neigh->confirmed = jiffies;
1154
1155         /* If entry was valid and address is not changed,
1156            do not change entry state, if new one is STALE.
1157          */
1158         err = 0;
1159         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1160         if (old & NUD_VALID) {
1161                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1162                         update_isrouter = 0;
1163                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1164                             (old & NUD_CONNECTED)) {
1165                                 lladdr = neigh->ha;
1166                                 new = NUD_STALE;
1167                         } else
1168                                 goto out;
1169                 } else {
1170                         if (lladdr == neigh->ha && new == NUD_STALE &&
1171                             !(flags & NEIGH_UPDATE_F_ADMIN))
1172                                 new = old;
1173                 }
1174         }
1175
1176         /* Update timestamp only once we know we will make a change to the
1177          * neighbour entry. Otherwise we risk to move the locktime window with
1178          * noop updates and ignore relevant ARP updates.
1179          */
1180         if (new != old || lladdr != neigh->ha)
1181                 neigh->updated = jiffies;
1182
1183         if (new != old) {
1184                 neigh_del_timer(neigh);
1185                 if (new & NUD_PROBE)
1186                         atomic_set(&neigh->probes, 0);
1187                 if (new & NUD_IN_TIMER)
1188                         neigh_add_timer(neigh, (jiffies +
1189                                                 ((new & NUD_REACHABLE) ?
1190                                                  neigh->parms->reachable_time :
1191                                                  0)));
1192                 neigh->nud_state = new;
1193                 notify = 1;
1194         }
1195
1196         if (lladdr != neigh->ha) {
1197                 write_seqlock(&neigh->ha_lock);
1198                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1199                 write_sequnlock(&neigh->ha_lock);
1200                 neigh_update_hhs(neigh);
1201                 if (!(new & NUD_CONNECTED))
1202                         neigh->confirmed = jiffies -
1203                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1204                 notify = 1;
1205         }
1206         if (new == old)
1207                 goto out;
1208         if (new & NUD_CONNECTED)
1209                 neigh_connect(neigh);
1210         else
1211                 neigh_suspect(neigh);
1212         if (!(old & NUD_VALID)) {
1213                 struct sk_buff *skb;
1214
1215                 /* Again: avoid dead loop if something went wrong */
1216
1217                 while (neigh->nud_state & NUD_VALID &&
1218                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1219                         struct dst_entry *dst = skb_dst(skb);
1220                         struct neighbour *n2, *n1 = neigh;
1221                         write_unlock_bh(&neigh->lock);
1222
1223                         rcu_read_lock();
1224
1225                         /* Why not just use 'neigh' as-is?  The problem is that
1226                          * things such as shaper, eql, and sch_teql can end up
1227                          * using alternative, different, neigh objects to output
1228                          * the packet in the output path.  So what we need to do
1229                          * here is re-lookup the top-level neigh in the path so
1230                          * we can reinject the packet there.
1231                          */
1232                         n2 = NULL;
1233                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1234                                 n2 = dst_neigh_lookup_skb(dst, skb);
1235                                 if (n2)
1236                                         n1 = n2;
1237                         }
1238                         n1->output(n1, skb);
1239                         if (n2)
1240                                 neigh_release(n2);
1241                         rcu_read_unlock();
1242
1243                         write_lock_bh(&neigh->lock);
1244                 }
1245                 __skb_queue_purge(&neigh->arp_queue);
1246                 neigh->arp_queue_len_bytes = 0;
1247         }
1248 out:
1249         if (update_isrouter) {
1250                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1251                         (neigh->flags | NTF_ROUTER) :
1252                         (neigh->flags & ~NTF_ROUTER);
1253         }
1254         write_unlock_bh(&neigh->lock);
1255
1256         if (notify)
1257                 neigh_update_notify(neigh);
1258
1259         return err;
1260 }
1261 EXPORT_SYMBOL(neigh_update);
1262
1263 /* Update the neigh to listen temporarily for probe responses, even if it is
1264  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1265  */
1266 void __neigh_set_probe_once(struct neighbour *neigh)
1267 {
1268         if (neigh->dead)
1269                 return;
1270         neigh->updated = jiffies;
1271         if (!(neigh->nud_state & NUD_FAILED))
1272                 return;
1273         neigh->nud_state = NUD_INCOMPLETE;
1274         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1275         neigh_add_timer(neigh,
1276                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1277 }
1278 EXPORT_SYMBOL(__neigh_set_probe_once);
1279
1280 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1281                                  u8 *lladdr, void *saddr,
1282                                  struct net_device *dev)
1283 {
1284         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1285                                                  lladdr || !dev->addr_len);
1286         if (neigh)
1287                 neigh_update(neigh, lladdr, NUD_STALE,
1288                              NEIGH_UPDATE_F_OVERRIDE);
1289         return neigh;
1290 }
1291 EXPORT_SYMBOL(neigh_event_ns);
1292
1293 /* called with read_lock_bh(&n->lock); */
1294 static void neigh_hh_init(struct neighbour *n)
1295 {
1296         struct net_device *dev = n->dev;
1297         __be16 prot = n->tbl->protocol;
1298         struct hh_cache *hh = &n->hh;
1299
1300         write_lock_bh(&n->lock);
1301
1302         /* Only one thread can come in here and initialize the
1303          * hh_cache entry.
1304          */
1305         if (!hh->hh_len)
1306                 dev->header_ops->cache(n, hh, prot);
1307
1308         write_unlock_bh(&n->lock);
1309 }
1310
1311 /* Slow and careful. */
1312
1313 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1314 {
1315         int rc = 0;
1316
1317         if (!neigh_event_send(neigh, skb)) {
1318                 int err;
1319                 struct net_device *dev = neigh->dev;
1320                 unsigned int seq;
1321
1322                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1323                         neigh_hh_init(neigh);
1324
1325                 do {
1326                         __skb_pull(skb, skb_network_offset(skb));
1327                         seq = read_seqbegin(&neigh->ha_lock);
1328                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1329                                               neigh->ha, NULL, skb->len);
1330                 } while (read_seqretry(&neigh->ha_lock, seq));
1331
1332                 if (err >= 0)
1333                         rc = dev_queue_xmit(skb);
1334                 else
1335                         goto out_kfree_skb;
1336         }
1337 out:
1338         return rc;
1339 out_kfree_skb:
1340         rc = -EINVAL;
1341         kfree_skb(skb);
1342         goto out;
1343 }
1344 EXPORT_SYMBOL(neigh_resolve_output);
1345
1346 /* As fast as possible without hh cache */
1347
1348 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350         struct net_device *dev = neigh->dev;
1351         unsigned int seq;
1352         int err;
1353
1354         do {
1355                 __skb_pull(skb, skb_network_offset(skb));
1356                 seq = read_seqbegin(&neigh->ha_lock);
1357                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1358                                       neigh->ha, NULL, skb->len);
1359         } while (read_seqretry(&neigh->ha_lock, seq));
1360
1361         if (err >= 0)
1362                 err = dev_queue_xmit(skb);
1363         else {
1364                 err = -EINVAL;
1365                 kfree_skb(skb);
1366         }
1367         return err;
1368 }
1369 EXPORT_SYMBOL(neigh_connected_output);
1370
1371 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1372 {
1373         return dev_queue_xmit(skb);
1374 }
1375 EXPORT_SYMBOL(neigh_direct_output);
1376
1377 static void neigh_proxy_process(unsigned long arg)
1378 {
1379         struct neigh_table *tbl = (struct neigh_table *)arg;
1380         long sched_next = 0;
1381         unsigned long now = jiffies;
1382         struct sk_buff *skb, *n;
1383
1384         spin_lock(&tbl->proxy_queue.lock);
1385
1386         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1387                 long tdif = NEIGH_CB(skb)->sched_next - now;
1388
1389                 if (tdif <= 0) {
1390                         struct net_device *dev = skb->dev;
1391
1392                         __skb_unlink(skb, &tbl->proxy_queue);
1393                         if (tbl->proxy_redo && netif_running(dev)) {
1394                                 rcu_read_lock();
1395                                 tbl->proxy_redo(skb);
1396                                 rcu_read_unlock();
1397                         } else {
1398                                 kfree_skb(skb);
1399                         }
1400
1401                         dev_put(dev);
1402                 } else if (!sched_next || tdif < sched_next)
1403                         sched_next = tdif;
1404         }
1405         del_timer(&tbl->proxy_timer);
1406         if (sched_next)
1407                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1408         spin_unlock(&tbl->proxy_queue.lock);
1409 }
1410
1411 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1412                     struct sk_buff *skb)
1413 {
1414         unsigned long now = jiffies;
1415
1416         unsigned long sched_next = now + (prandom_u32() %
1417                                           NEIGH_VAR(p, PROXY_DELAY));
1418
1419         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1420                 kfree_skb(skb);
1421                 return;
1422         }
1423
1424         NEIGH_CB(skb)->sched_next = sched_next;
1425         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1426
1427         spin_lock(&tbl->proxy_queue.lock);
1428         if (del_timer(&tbl->proxy_timer)) {
1429                 if (time_before(tbl->proxy_timer.expires, sched_next))
1430                         sched_next = tbl->proxy_timer.expires;
1431         }
1432         skb_dst_drop(skb);
1433         dev_hold(skb->dev);
1434         __skb_queue_tail(&tbl->proxy_queue, skb);
1435         mod_timer(&tbl->proxy_timer, sched_next);
1436         spin_unlock(&tbl->proxy_queue.lock);
1437 }
1438 EXPORT_SYMBOL(pneigh_enqueue);
1439
1440 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1441                                                       struct net *net, int ifindex)
1442 {
1443         struct neigh_parms *p;
1444
1445         list_for_each_entry(p, &tbl->parms_list, list) {
1446                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1447                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1448                         return p;
1449         }
1450
1451         return NULL;
1452 }
1453
1454 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1455                                       struct neigh_table *tbl)
1456 {
1457         struct neigh_parms *p;
1458         struct net *net = dev_net(dev);
1459         const struct net_device_ops *ops = dev->netdev_ops;
1460
1461         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1462         if (p) {
1463                 p->tbl            = tbl;
1464                 atomic_set(&p->refcnt, 1);
1465                 p->reachable_time =
1466                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1467                 dev_hold(dev);
1468                 p->dev = dev;
1469                 write_pnet(&p->net, net);
1470                 p->sysctl_table = NULL;
1471
1472                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1473                         dev_put(dev);
1474                         kfree(p);
1475                         return NULL;
1476                 }
1477
1478                 write_lock_bh(&tbl->lock);
1479                 list_add(&p->list, &tbl->parms.list);
1480                 write_unlock_bh(&tbl->lock);
1481
1482                 neigh_parms_data_state_cleanall(p);
1483         }
1484         return p;
1485 }
1486 EXPORT_SYMBOL(neigh_parms_alloc);
1487
1488 static void neigh_rcu_free_parms(struct rcu_head *head)
1489 {
1490         struct neigh_parms *parms =
1491                 container_of(head, struct neigh_parms, rcu_head);
1492
1493         neigh_parms_put(parms);
1494 }
1495
1496 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1497 {
1498         if (!parms || parms == &tbl->parms)
1499                 return;
1500         write_lock_bh(&tbl->lock);
1501         list_del(&parms->list);
1502         parms->dead = 1;
1503         write_unlock_bh(&tbl->lock);
1504         if (parms->dev)
1505                 dev_put(parms->dev);
1506         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1507 }
1508 EXPORT_SYMBOL(neigh_parms_release);
1509
1510 static void neigh_parms_destroy(struct neigh_parms *parms)
1511 {
1512         kfree(parms);
1513 }
1514
1515 static struct lock_class_key neigh_table_proxy_queue_class;
1516
1517 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1518
1519 void neigh_table_init(int index, struct neigh_table *tbl)
1520 {
1521         unsigned long now = jiffies;
1522         unsigned long phsize;
1523
1524         INIT_LIST_HEAD(&tbl->parms_list);
1525         list_add(&tbl->parms.list, &tbl->parms_list);
1526         write_pnet(&tbl->parms.net, &init_net);
1527         atomic_set(&tbl->parms.refcnt, 1);
1528         tbl->parms.reachable_time =
1529                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1530
1531         tbl->stats = alloc_percpu(struct neigh_statistics);
1532         if (!tbl->stats)
1533                 panic("cannot create neighbour cache statistics");
1534
1535 #ifdef CONFIG_PROC_FS
1536         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1537                               &neigh_stat_seq_fops, tbl))
1538                 panic("cannot create neighbour proc dir entry");
1539 #endif
1540
1541         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1542
1543         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1544         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1545
1546         if (!tbl->nht || !tbl->phash_buckets)
1547                 panic("cannot allocate neighbour cache hashes");
1548
1549         if (!tbl->entry_size)
1550                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1551                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1552         else
1553                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1554
1555         rwlock_init(&tbl->lock);
1556         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1557         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1558                         tbl->parms.reachable_time);
1559         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1560         skb_queue_head_init_class(&tbl->proxy_queue,
1561                         &neigh_table_proxy_queue_class);
1562
1563         tbl->last_flush = now;
1564         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1565
1566         neigh_tables[index] = tbl;
1567 }
1568 EXPORT_SYMBOL(neigh_table_init);
1569
1570 int neigh_table_clear(int index, struct neigh_table *tbl)
1571 {
1572         neigh_tables[index] = NULL;
1573         /* It is not clean... Fix it to unload IPv6 module safely */
1574         cancel_delayed_work_sync(&tbl->gc_work);
1575         del_timer_sync(&tbl->proxy_timer);
1576         pneigh_queue_purge(&tbl->proxy_queue);
1577         neigh_ifdown(tbl, NULL);
1578         if (atomic_read(&tbl->entries))
1579                 pr_crit("neighbour leakage\n");
1580
1581         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1582                  neigh_hash_free_rcu);
1583         tbl->nht = NULL;
1584
1585         kfree(tbl->phash_buckets);
1586         tbl->phash_buckets = NULL;
1587
1588         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1589
1590         free_percpu(tbl->stats);
1591         tbl->stats = NULL;
1592
1593         return 0;
1594 }
1595 EXPORT_SYMBOL(neigh_table_clear);
1596
1597 static struct neigh_table *neigh_find_table(int family)
1598 {
1599         struct neigh_table *tbl = NULL;
1600
1601         switch (family) {
1602         case AF_INET:
1603                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1604                 break;
1605         case AF_INET6:
1606                 tbl = neigh_tables[NEIGH_ND_TABLE];
1607                 break;
1608         case AF_DECnet:
1609                 tbl = neigh_tables[NEIGH_DN_TABLE];
1610                 break;
1611         }
1612
1613         return tbl;
1614 }
1615
1616 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1617 {
1618         struct net *net = sock_net(skb->sk);
1619         struct ndmsg *ndm;
1620         struct nlattr *dst_attr;
1621         struct neigh_table *tbl;
1622         struct neighbour *neigh;
1623         struct net_device *dev = NULL;
1624         int err = -EINVAL;
1625
1626         ASSERT_RTNL();
1627         if (nlmsg_len(nlh) < sizeof(*ndm))
1628                 goto out;
1629
1630         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1631         if (dst_attr == NULL)
1632                 goto out;
1633
1634         ndm = nlmsg_data(nlh);
1635         if (ndm->ndm_ifindex) {
1636                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1637                 if (dev == NULL) {
1638                         err = -ENODEV;
1639                         goto out;
1640                 }
1641         }
1642
1643         tbl = neigh_find_table(ndm->ndm_family);
1644         if (tbl == NULL)
1645                 return -EAFNOSUPPORT;
1646
1647         if (nla_len(dst_attr) < tbl->key_len)
1648                 goto out;
1649
1650         if (ndm->ndm_flags & NTF_PROXY) {
1651                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1652                 goto out;
1653         }
1654
1655         if (dev == NULL)
1656                 goto out;
1657
1658         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1659         if (neigh == NULL) {
1660                 err = -ENOENT;
1661                 goto out;
1662         }
1663
1664         err = neigh_update(neigh, NULL, NUD_FAILED,
1665                            NEIGH_UPDATE_F_OVERRIDE |
1666                            NEIGH_UPDATE_F_ADMIN);
1667         neigh_release(neigh);
1668
1669 out:
1670         return err;
1671 }
1672
1673 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1674 {
1675         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1676         struct net *net = sock_net(skb->sk);
1677         struct ndmsg *ndm;
1678         struct nlattr *tb[NDA_MAX+1];
1679         struct neigh_table *tbl;
1680         struct net_device *dev = NULL;
1681         struct neighbour *neigh;
1682         void *dst, *lladdr;
1683         int err;
1684
1685         ASSERT_RTNL();
1686         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1687         if (err < 0)
1688                 goto out;
1689
1690         err = -EINVAL;
1691         if (tb[NDA_DST] == NULL)
1692                 goto out;
1693
1694         ndm = nlmsg_data(nlh);
1695         if (ndm->ndm_ifindex) {
1696                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1697                 if (dev == NULL) {
1698                         err = -ENODEV;
1699                         goto out;
1700                 }
1701
1702                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1703                         goto out;
1704         }
1705
1706         tbl = neigh_find_table(ndm->ndm_family);
1707         if (tbl == NULL)
1708                 return -EAFNOSUPPORT;
1709
1710         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1711                 goto out;
1712         dst = nla_data(tb[NDA_DST]);
1713         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1714
1715         if (ndm->ndm_flags & NTF_PROXY) {
1716                 struct pneigh_entry *pn;
1717
1718                 err = -ENOBUFS;
1719                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1720                 if (pn) {
1721                         pn->flags = ndm->ndm_flags;
1722                         err = 0;
1723                 }
1724                 goto out;
1725         }
1726
1727         if (dev == NULL)
1728                 goto out;
1729
1730         neigh = neigh_lookup(tbl, dst, dev);
1731         if (neigh == NULL) {
1732                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1733                         err = -ENOENT;
1734                         goto out;
1735                 }
1736
1737                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1738                 if (IS_ERR(neigh)) {
1739                         err = PTR_ERR(neigh);
1740                         goto out;
1741                 }
1742         } else {
1743                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1744                         err = -EEXIST;
1745                         neigh_release(neigh);
1746                         goto out;
1747                 }
1748
1749                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1750                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1751         }
1752
1753         if (ndm->ndm_flags & NTF_USE) {
1754                 neigh_event_send(neigh, NULL);
1755                 err = 0;
1756         } else
1757                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1758         neigh_release(neigh);
1759
1760 out:
1761         return err;
1762 }
1763
1764 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1765 {
1766         struct nlattr *nest;
1767
1768         nest = nla_nest_start(skb, NDTA_PARMS);
1769         if (nest == NULL)
1770                 return -ENOBUFS;
1771
1772         if ((parms->dev &&
1773              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1774             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1775             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1776                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1777             /* approximative value for deprecated QUEUE_LEN (in packets) */
1778             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1779                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1780             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1781             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1782             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1783                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1784             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1785                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1786             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1787                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1788             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1789                           NDTPA_PAD) ||
1790             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1791                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1792             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1793                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1794             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1795                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1796             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1797                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1798             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1799                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1800             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1801                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1802             nla_put_msecs(skb, NDTPA_LOCKTIME,
1803                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1804                 goto nla_put_failure;
1805         return nla_nest_end(skb, nest);
1806
1807 nla_put_failure:
1808         nla_nest_cancel(skb, nest);
1809         return -EMSGSIZE;
1810 }
1811
1812 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1813                               u32 pid, u32 seq, int type, int flags)
1814 {
1815         struct nlmsghdr *nlh;
1816         struct ndtmsg *ndtmsg;
1817
1818         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1819         if (nlh == NULL)
1820                 return -EMSGSIZE;
1821
1822         ndtmsg = nlmsg_data(nlh);
1823
1824         read_lock_bh(&tbl->lock);
1825         ndtmsg->ndtm_family = tbl->family;
1826         ndtmsg->ndtm_pad1   = 0;
1827         ndtmsg->ndtm_pad2   = 0;
1828
1829         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1830             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1831             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1832             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1833             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1834                 goto nla_put_failure;
1835         {
1836                 unsigned long now = jiffies;
1837                 long flush_delta = now - tbl->last_flush;
1838                 long rand_delta = now - tbl->last_rand;
1839                 struct neigh_hash_table *nht;
1840                 struct ndt_config ndc = {
1841                         .ndtc_key_len           = tbl->key_len,
1842                         .ndtc_entry_size        = tbl->entry_size,
1843                         .ndtc_entries           = atomic_read(&tbl->entries),
1844                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1845                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1846                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1847                 };
1848
1849                 rcu_read_lock_bh();
1850                 nht = rcu_dereference_bh(tbl->nht);
1851                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1852                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1853                 rcu_read_unlock_bh();
1854
1855                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1856                         goto nla_put_failure;
1857         }
1858
1859         {
1860                 int cpu;
1861                 struct ndt_stats ndst;
1862
1863                 memset(&ndst, 0, sizeof(ndst));
1864
1865                 for_each_possible_cpu(cpu) {
1866                         struct neigh_statistics *st;
1867
1868                         st = per_cpu_ptr(tbl->stats, cpu);
1869                         ndst.ndts_allocs                += st->allocs;
1870                         ndst.ndts_destroys              += st->destroys;
1871                         ndst.ndts_hash_grows            += st->hash_grows;
1872                         ndst.ndts_res_failed            += st->res_failed;
1873                         ndst.ndts_lookups               += st->lookups;
1874                         ndst.ndts_hits                  += st->hits;
1875                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1876                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1877                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1878                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1879                         ndst.ndts_table_fulls           += st->table_fulls;
1880                 }
1881
1882                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1883                                   NDTA_PAD))
1884                         goto nla_put_failure;
1885         }
1886
1887         BUG_ON(tbl->parms.dev);
1888         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1889                 goto nla_put_failure;
1890
1891         read_unlock_bh(&tbl->lock);
1892         nlmsg_end(skb, nlh);
1893         return 0;
1894
1895 nla_put_failure:
1896         read_unlock_bh(&tbl->lock);
1897         nlmsg_cancel(skb, nlh);
1898         return -EMSGSIZE;
1899 }
1900
1901 static int neightbl_fill_param_info(struct sk_buff *skb,
1902                                     struct neigh_table *tbl,
1903                                     struct neigh_parms *parms,
1904                                     u32 pid, u32 seq, int type,
1905                                     unsigned int flags)
1906 {
1907         struct ndtmsg *ndtmsg;
1908         struct nlmsghdr *nlh;
1909
1910         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1911         if (nlh == NULL)
1912                 return -EMSGSIZE;
1913
1914         ndtmsg = nlmsg_data(nlh);
1915
1916         read_lock_bh(&tbl->lock);
1917         ndtmsg->ndtm_family = tbl->family;
1918         ndtmsg->ndtm_pad1   = 0;
1919         ndtmsg->ndtm_pad2   = 0;
1920
1921         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1922             neightbl_fill_parms(skb, parms) < 0)
1923                 goto errout;
1924
1925         read_unlock_bh(&tbl->lock);
1926         nlmsg_end(skb, nlh);
1927         return 0;
1928 errout:
1929         read_unlock_bh(&tbl->lock);
1930         nlmsg_cancel(skb, nlh);
1931         return -EMSGSIZE;
1932 }
1933
1934 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1935         [NDTA_NAME]             = { .type = NLA_STRING },
1936         [NDTA_THRESH1]          = { .type = NLA_U32 },
1937         [NDTA_THRESH2]          = { .type = NLA_U32 },
1938         [NDTA_THRESH3]          = { .type = NLA_U32 },
1939         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1940         [NDTA_PARMS]            = { .type = NLA_NESTED },
1941 };
1942
1943 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1944         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1945         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1946         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1947         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1948         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1949         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1950         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1951         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1952         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1953         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1954         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1955         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1956         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1957         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1958 };
1959
1960 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1961 {
1962         struct net *net = sock_net(skb->sk);
1963         struct neigh_table *tbl;
1964         struct ndtmsg *ndtmsg;
1965         struct nlattr *tb[NDTA_MAX+1];
1966         bool found = false;
1967         int err, tidx;
1968
1969         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1970                           nl_neightbl_policy);
1971         if (err < 0)
1972                 goto errout;
1973
1974         if (tb[NDTA_NAME] == NULL) {
1975                 err = -EINVAL;
1976                 goto errout;
1977         }
1978
1979         ndtmsg = nlmsg_data(nlh);
1980
1981         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1982                 tbl = neigh_tables[tidx];
1983                 if (!tbl)
1984                         continue;
1985                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1986                         continue;
1987                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1988                         found = true;
1989                         break;
1990                 }
1991         }
1992
1993         if (!found)
1994                 return -ENOENT;
1995
1996         /*
1997          * We acquire tbl->lock to be nice to the periodic timers and
1998          * make sure they always see a consistent set of values.
1999          */
2000         write_lock_bh(&tbl->lock);
2001
2002         if (tb[NDTA_PARMS]) {
2003                 struct nlattr *tbp[NDTPA_MAX+1];
2004                 struct neigh_parms *p;
2005                 int i, ifindex = 0;
2006
2007                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2008                                        nl_ntbl_parm_policy);
2009                 if (err < 0)
2010                         goto errout_tbl_lock;
2011
2012                 if (tbp[NDTPA_IFINDEX])
2013                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2014
2015                 p = lookup_neigh_parms(tbl, net, ifindex);
2016                 if (p == NULL) {
2017                         err = -ENOENT;
2018                         goto errout_tbl_lock;
2019                 }
2020
2021                 for (i = 1; i <= NDTPA_MAX; i++) {
2022                         if (tbp[i] == NULL)
2023                                 continue;
2024
2025                         switch (i) {
2026                         case NDTPA_QUEUE_LEN:
2027                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2028                                               nla_get_u32(tbp[i]) *
2029                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2030                                 break;
2031                         case NDTPA_QUEUE_LENBYTES:
2032                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2033                                               nla_get_u32(tbp[i]));
2034                                 break;
2035                         case NDTPA_PROXY_QLEN:
2036                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2037                                               nla_get_u32(tbp[i]));
2038                                 break;
2039                         case NDTPA_APP_PROBES:
2040                                 NEIGH_VAR_SET(p, APP_PROBES,
2041                                               nla_get_u32(tbp[i]));
2042                                 break;
2043                         case NDTPA_UCAST_PROBES:
2044                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2045                                               nla_get_u32(tbp[i]));
2046                                 break;
2047                         case NDTPA_MCAST_PROBES:
2048                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2049                                               nla_get_u32(tbp[i]));
2050                                 break;
2051                         case NDTPA_MCAST_REPROBES:
2052                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2053                                               nla_get_u32(tbp[i]));
2054                                 break;
2055                         case NDTPA_BASE_REACHABLE_TIME:
2056                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2057                                               nla_get_msecs(tbp[i]));
2058                                 /* update reachable_time as well, otherwise, the change will
2059                                  * only be effective after the next time neigh_periodic_work
2060                                  * decides to recompute it (can be multiple minutes)
2061                                  */
2062                                 p->reachable_time =
2063                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2064                                 break;
2065                         case NDTPA_GC_STALETIME:
2066                                 NEIGH_VAR_SET(p, GC_STALETIME,
2067                                               nla_get_msecs(tbp[i]));
2068                                 break;
2069                         case NDTPA_DELAY_PROBE_TIME:
2070                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2071                                               nla_get_msecs(tbp[i]));
2072                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2073                                 break;
2074                         case NDTPA_RETRANS_TIME:
2075                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2076                                               nla_get_msecs(tbp[i]));
2077                                 break;
2078                         case NDTPA_ANYCAST_DELAY:
2079                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2080                                               nla_get_msecs(tbp[i]));
2081                                 break;
2082                         case NDTPA_PROXY_DELAY:
2083                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2084                                               nla_get_msecs(tbp[i]));
2085                                 break;
2086                         case NDTPA_LOCKTIME:
2087                                 NEIGH_VAR_SET(p, LOCKTIME,
2088                                               nla_get_msecs(tbp[i]));
2089                                 break;
2090                         }
2091                 }
2092         }
2093
2094         err = -ENOENT;
2095         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2096              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2097             !net_eq(net, &init_net))
2098                 goto errout_tbl_lock;
2099
2100         if (tb[NDTA_THRESH1])
2101                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2102
2103         if (tb[NDTA_THRESH2])
2104                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2105
2106         if (tb[NDTA_THRESH3])
2107                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2108
2109         if (tb[NDTA_GC_INTERVAL])
2110                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2111
2112         err = 0;
2113
2114 errout_tbl_lock:
2115         write_unlock_bh(&tbl->lock);
2116 errout:
2117         return err;
2118 }
2119
2120 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2121 {
2122         struct net *net = sock_net(skb->sk);
2123         int family, tidx, nidx = 0;
2124         int tbl_skip = cb->args[0];
2125         int neigh_skip = cb->args[1];
2126         struct neigh_table *tbl;
2127
2128         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2129
2130         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2131                 struct neigh_parms *p;
2132
2133                 tbl = neigh_tables[tidx];
2134                 if (!tbl)
2135                         continue;
2136
2137                 if (tidx < tbl_skip || (family && tbl->family != family))
2138                         continue;
2139
2140                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2141                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2142                                        NLM_F_MULTI) < 0)
2143                         break;
2144
2145                 nidx = 0;
2146                 p = list_next_entry(&tbl->parms, list);
2147                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2148                         if (!net_eq(neigh_parms_net(p), net))
2149                                 continue;
2150
2151                         if (nidx < neigh_skip)
2152                                 goto next;
2153
2154                         if (neightbl_fill_param_info(skb, tbl, p,
2155                                                      NETLINK_CB(cb->skb).portid,
2156                                                      cb->nlh->nlmsg_seq,
2157                                                      RTM_NEWNEIGHTBL,
2158                                                      NLM_F_MULTI) < 0)
2159                                 goto out;
2160                 next:
2161                         nidx++;
2162                 }
2163
2164                 neigh_skip = 0;
2165         }
2166 out:
2167         cb->args[0] = tidx;
2168         cb->args[1] = nidx;
2169
2170         return skb->len;
2171 }
2172
2173 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2174                            u32 pid, u32 seq, int type, unsigned int flags)
2175 {
2176         unsigned long now = jiffies;
2177         struct nda_cacheinfo ci;
2178         struct nlmsghdr *nlh;
2179         struct ndmsg *ndm;
2180
2181         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2182         if (nlh == NULL)
2183                 return -EMSGSIZE;
2184
2185         ndm = nlmsg_data(nlh);
2186         ndm->ndm_family  = neigh->ops->family;
2187         ndm->ndm_pad1    = 0;
2188         ndm->ndm_pad2    = 0;
2189         ndm->ndm_flags   = neigh->flags;
2190         ndm->ndm_type    = neigh->type;
2191         ndm->ndm_ifindex = neigh->dev->ifindex;
2192
2193         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2194                 goto nla_put_failure;
2195
2196         read_lock_bh(&neigh->lock);
2197         ndm->ndm_state   = neigh->nud_state;
2198         if (neigh->nud_state & NUD_VALID) {
2199                 char haddr[MAX_ADDR_LEN];
2200
2201                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2202                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2203                         read_unlock_bh(&neigh->lock);
2204                         goto nla_put_failure;
2205                 }
2206         }
2207
2208         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2209         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2210         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2211         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2212         read_unlock_bh(&neigh->lock);
2213
2214         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2215             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2216                 goto nla_put_failure;
2217
2218         nlmsg_end(skb, nlh);
2219         return 0;
2220
2221 nla_put_failure:
2222         nlmsg_cancel(skb, nlh);
2223         return -EMSGSIZE;
2224 }
2225
2226 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2227                             u32 pid, u32 seq, int type, unsigned int flags,
2228                             struct neigh_table *tbl)
2229 {
2230         struct nlmsghdr *nlh;
2231         struct ndmsg *ndm;
2232
2233         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2234         if (nlh == NULL)
2235                 return -EMSGSIZE;
2236
2237         ndm = nlmsg_data(nlh);
2238         ndm->ndm_family  = tbl->family;
2239         ndm->ndm_pad1    = 0;
2240         ndm->ndm_pad2    = 0;
2241         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2242         ndm->ndm_type    = RTN_UNICAST;
2243         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2244         ndm->ndm_state   = NUD_NONE;
2245
2246         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2247                 goto nla_put_failure;
2248
2249         nlmsg_end(skb, nlh);
2250         return 0;
2251
2252 nla_put_failure:
2253         nlmsg_cancel(skb, nlh);
2254         return -EMSGSIZE;
2255 }
2256
2257 static void neigh_update_notify(struct neighbour *neigh)
2258 {
2259         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2260         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2261 }
2262
2263 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2264 {
2265         struct net_device *master;
2266
2267         if (!master_idx)
2268                 return false;
2269
2270         master = netdev_master_upper_dev_get(dev);
2271         if (!master || master->ifindex != master_idx)
2272                 return true;
2273
2274         return false;
2275 }
2276
2277 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2278 {
2279         if (filter_idx && dev->ifindex != filter_idx)
2280                 return true;
2281
2282         return false;
2283 }
2284
2285 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2286                             struct netlink_callback *cb)
2287 {
2288         struct net *net = sock_net(skb->sk);
2289         const struct nlmsghdr *nlh = cb->nlh;
2290         struct nlattr *tb[NDA_MAX + 1];
2291         struct neighbour *n;
2292         int rc, h, s_h = cb->args[1];
2293         int idx, s_idx = idx = cb->args[2];
2294         struct neigh_hash_table *nht;
2295         int filter_master_idx = 0, filter_idx = 0;
2296         unsigned int flags = NLM_F_MULTI;
2297         int err;
2298
2299         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
2300         if (!err) {
2301                 if (tb[NDA_IFINDEX]) {
2302                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2303                                 return -EINVAL;
2304                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2305                 }
2306                 if (tb[NDA_MASTER]) {
2307                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2308                                 return -EINVAL;
2309                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2310                 }
2311                 if (filter_idx || filter_master_idx)
2312                         flags |= NLM_F_DUMP_FILTERED;
2313         }
2314
2315         rcu_read_lock_bh();
2316         nht = rcu_dereference_bh(tbl->nht);
2317
2318         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2319                 if (h > s_h)
2320                         s_idx = 0;
2321                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2322                      n != NULL;
2323                      n = rcu_dereference_bh(n->next)) {
2324                         if (!net_eq(dev_net(n->dev), net))
2325                                 continue;
2326                         if (neigh_ifindex_filtered(n->dev, filter_idx))
2327                                 continue;
2328                         if (neigh_master_filtered(n->dev, filter_master_idx))
2329                                 continue;
2330                         if (idx < s_idx)
2331                                 goto next;
2332                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2333                                             cb->nlh->nlmsg_seq,
2334                                             RTM_NEWNEIGH,
2335                                             flags) < 0) {
2336                                 rc = -1;
2337                                 goto out;
2338                         }
2339 next:
2340                         idx++;
2341                 }
2342         }
2343         rc = skb->len;
2344 out:
2345         rcu_read_unlock_bh();
2346         cb->args[1] = h;
2347         cb->args[2] = idx;
2348         return rc;
2349 }
2350
2351 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2352                              struct netlink_callback *cb)
2353 {
2354         struct pneigh_entry *n;
2355         struct net *net = sock_net(skb->sk);
2356         int rc, h, s_h = cb->args[3];
2357         int idx, s_idx = idx = cb->args[4];
2358
2359         read_lock_bh(&tbl->lock);
2360
2361         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2362                 if (h > s_h)
2363                         s_idx = 0;
2364                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2365                         if (pneigh_net(n) != net)
2366                                 continue;
2367                         if (idx < s_idx)
2368                                 goto next;
2369                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2370                                             cb->nlh->nlmsg_seq,
2371                                             RTM_NEWNEIGH,
2372                                             NLM_F_MULTI, tbl) < 0) {
2373                                 read_unlock_bh(&tbl->lock);
2374                                 rc = -1;
2375                                 goto out;
2376                         }
2377                 next:
2378                         idx++;
2379                 }
2380         }
2381
2382         read_unlock_bh(&tbl->lock);
2383         rc = skb->len;
2384 out:
2385         cb->args[3] = h;
2386         cb->args[4] = idx;
2387         return rc;
2388
2389 }
2390
2391 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2392 {
2393         struct neigh_table *tbl;
2394         int t, family, s_t;
2395         int proxy = 0;
2396         int err;
2397
2398         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2399
2400         /* check for full ndmsg structure presence, family member is
2401          * the same for both structures
2402          */
2403         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2404             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2405                 proxy = 1;
2406
2407         s_t = cb->args[0];
2408
2409         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2410                 tbl = neigh_tables[t];
2411
2412                 if (!tbl)
2413                         continue;
2414                 if (t < s_t || (family && tbl->family != family))
2415                         continue;
2416                 if (t > s_t)
2417                         memset(&cb->args[1], 0, sizeof(cb->args) -
2418                                                 sizeof(cb->args[0]));
2419                 if (proxy)
2420                         err = pneigh_dump_table(tbl, skb, cb);
2421                 else
2422                         err = neigh_dump_table(tbl, skb, cb);
2423                 if (err < 0)
2424                         break;
2425         }
2426
2427         cb->args[0] = t;
2428         return skb->len;
2429 }
2430
2431 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2432 {
2433         int chain;
2434         struct neigh_hash_table *nht;
2435
2436         rcu_read_lock_bh();
2437         nht = rcu_dereference_bh(tbl->nht);
2438
2439         read_lock(&tbl->lock); /* avoid resizes */
2440         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2441                 struct neighbour *n;
2442
2443                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2444                      n != NULL;
2445                      n = rcu_dereference_bh(n->next))
2446                         cb(n, cookie);
2447         }
2448         read_unlock(&tbl->lock);
2449         rcu_read_unlock_bh();
2450 }
2451 EXPORT_SYMBOL(neigh_for_each);
2452
2453 /* The tbl->lock must be held as a writer and BH disabled. */
2454 void __neigh_for_each_release(struct neigh_table *tbl,
2455                               int (*cb)(struct neighbour *))
2456 {
2457         int chain;
2458         struct neigh_hash_table *nht;
2459
2460         nht = rcu_dereference_protected(tbl->nht,
2461                                         lockdep_is_held(&tbl->lock));
2462         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2463                 struct neighbour *n;
2464                 struct neighbour __rcu **np;
2465
2466                 np = &nht->hash_buckets[chain];
2467                 while ((n = rcu_dereference_protected(*np,
2468                                         lockdep_is_held(&tbl->lock))) != NULL) {
2469                         int release;
2470
2471                         write_lock(&n->lock);
2472                         release = cb(n);
2473                         if (release) {
2474                                 rcu_assign_pointer(*np,
2475                                         rcu_dereference_protected(n->next,
2476                                                 lockdep_is_held(&tbl->lock)));
2477                                 n->dead = 1;
2478                         } else
2479                                 np = &n->next;
2480                         write_unlock(&n->lock);
2481                         if (release)
2482                                 neigh_cleanup_and_release(n);
2483                 }
2484         }
2485 }
2486 EXPORT_SYMBOL(__neigh_for_each_release);
2487
2488 int neigh_xmit(int index, struct net_device *dev,
2489                const void *addr, struct sk_buff *skb)
2490 {
2491         int err = -EAFNOSUPPORT;
2492         if (likely(index < NEIGH_NR_TABLES)) {
2493                 struct neigh_table *tbl;
2494                 struct neighbour *neigh;
2495
2496                 tbl = neigh_tables[index];
2497                 if (!tbl)
2498                         goto out;
2499                 rcu_read_lock_bh();
2500                 if (index == NEIGH_ARP_TABLE) {
2501                         u32 key = *((u32 *)addr);
2502
2503                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2504                 } else {
2505                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2506                 }
2507                 if (!neigh)
2508                         neigh = __neigh_create(tbl, addr, dev, false);
2509                 err = PTR_ERR(neigh);
2510                 if (IS_ERR(neigh)) {
2511                         rcu_read_unlock_bh();
2512                         goto out_kfree_skb;
2513                 }
2514                 err = neigh->output(neigh, skb);
2515                 rcu_read_unlock_bh();
2516         }
2517         else if (index == NEIGH_LINK_TABLE) {
2518                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2519                                       addr, NULL, skb->len);
2520                 if (err < 0)
2521                         goto out_kfree_skb;
2522                 err = dev_queue_xmit(skb);
2523         }
2524 out:
2525         return err;
2526 out_kfree_skb:
2527         kfree_skb(skb);
2528         goto out;
2529 }
2530 EXPORT_SYMBOL(neigh_xmit);
2531
2532 #ifdef CONFIG_PROC_FS
2533
2534 static struct neighbour *neigh_get_first(struct seq_file *seq)
2535 {
2536         struct neigh_seq_state *state = seq->private;
2537         struct net *net = seq_file_net(seq);
2538         struct neigh_hash_table *nht = state->nht;
2539         struct neighbour *n = NULL;
2540         int bucket = state->bucket;
2541
2542         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2543         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2544                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2545
2546                 while (n) {
2547                         if (!net_eq(dev_net(n->dev), net))
2548                                 goto next;
2549                         if (state->neigh_sub_iter) {
2550                                 loff_t fakep = 0;
2551                                 void *v;
2552
2553                                 v = state->neigh_sub_iter(state, n, &fakep);
2554                                 if (!v)
2555                                         goto next;
2556                         }
2557                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2558                                 break;
2559                         if (n->nud_state & ~NUD_NOARP)
2560                                 break;
2561 next:
2562                         n = rcu_dereference_bh(n->next);
2563                 }
2564
2565                 if (n)
2566                         break;
2567         }
2568         state->bucket = bucket;
2569
2570         return n;
2571 }
2572
2573 static struct neighbour *neigh_get_next(struct seq_file *seq,
2574                                         struct neighbour *n,
2575                                         loff_t *pos)
2576 {
2577         struct neigh_seq_state *state = seq->private;
2578         struct net *net = seq_file_net(seq);
2579         struct neigh_hash_table *nht = state->nht;
2580
2581         if (state->neigh_sub_iter) {
2582                 void *v = state->neigh_sub_iter(state, n, pos);
2583                 if (v)
2584                         return n;
2585         }
2586         n = rcu_dereference_bh(n->next);
2587
2588         while (1) {
2589                 while (n) {
2590                         if (!net_eq(dev_net(n->dev), net))
2591                                 goto next;
2592                         if (state->neigh_sub_iter) {
2593                                 void *v = state->neigh_sub_iter(state, n, pos);
2594                                 if (v)
2595                                         return n;
2596                                 goto next;
2597                         }
2598                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2599                                 break;
2600
2601                         if (n->nud_state & ~NUD_NOARP)
2602                                 break;
2603 next:
2604                         n = rcu_dereference_bh(n->next);
2605                 }
2606
2607                 if (n)
2608                         break;
2609
2610                 if (++state->bucket >= (1 << nht->hash_shift))
2611                         break;
2612
2613                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2614         }
2615
2616         if (n && pos)
2617                 --(*pos);
2618         return n;
2619 }
2620
2621 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2622 {
2623         struct neighbour *n = neigh_get_first(seq);
2624
2625         if (n) {
2626                 --(*pos);
2627                 while (*pos) {
2628                         n = neigh_get_next(seq, n, pos);
2629                         if (!n)
2630                                 break;
2631                 }
2632         }
2633         return *pos ? NULL : n;
2634 }
2635
2636 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2637 {
2638         struct neigh_seq_state *state = seq->private;
2639         struct net *net = seq_file_net(seq);
2640         struct neigh_table *tbl = state->tbl;
2641         struct pneigh_entry *pn = NULL;
2642         int bucket = state->bucket;
2643
2644         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2645         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2646                 pn = tbl->phash_buckets[bucket];
2647                 while (pn && !net_eq(pneigh_net(pn), net))
2648                         pn = pn->next;
2649                 if (pn)
2650                         break;
2651         }
2652         state->bucket = bucket;
2653
2654         return pn;
2655 }
2656
2657 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2658                                             struct pneigh_entry *pn,
2659                                             loff_t *pos)
2660 {
2661         struct neigh_seq_state *state = seq->private;
2662         struct net *net = seq_file_net(seq);
2663         struct neigh_table *tbl = state->tbl;
2664
2665         do {
2666                 pn = pn->next;
2667         } while (pn && !net_eq(pneigh_net(pn), net));
2668
2669         while (!pn) {
2670                 if (++state->bucket > PNEIGH_HASHMASK)
2671                         break;
2672                 pn = tbl->phash_buckets[state->bucket];
2673                 while (pn && !net_eq(pneigh_net(pn), net))
2674                         pn = pn->next;
2675                 if (pn)
2676                         break;
2677         }
2678
2679         if (pn && pos)
2680                 --(*pos);
2681
2682         return pn;
2683 }
2684
2685 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2686 {
2687         struct pneigh_entry *pn = pneigh_get_first(seq);
2688
2689         if (pn) {
2690                 --(*pos);
2691                 while (*pos) {
2692                         pn = pneigh_get_next(seq, pn, pos);
2693                         if (!pn)
2694                                 break;
2695                 }
2696         }
2697         return *pos ? NULL : pn;
2698 }
2699
2700 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2701 {
2702         struct neigh_seq_state *state = seq->private;
2703         void *rc;
2704         loff_t idxpos = *pos;
2705
2706         rc = neigh_get_idx(seq, &idxpos);
2707         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2708                 rc = pneigh_get_idx(seq, &idxpos);
2709
2710         return rc;
2711 }
2712
2713 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2714         __acquires(tbl->lock)
2715         __acquires(rcu_bh)
2716 {
2717         struct neigh_seq_state *state = seq->private;
2718
2719         state->tbl = tbl;
2720         state->bucket = 0;
2721         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2722
2723         rcu_read_lock_bh();
2724         state->nht = rcu_dereference_bh(tbl->nht);
2725         read_lock(&tbl->lock);
2726
2727         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2728 }
2729 EXPORT_SYMBOL(neigh_seq_start);
2730
2731 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2732 {
2733         struct neigh_seq_state *state;
2734         void *rc;
2735
2736         if (v == SEQ_START_TOKEN) {
2737                 rc = neigh_get_first(seq);
2738                 goto out;
2739         }
2740
2741         state = seq->private;
2742         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2743                 rc = neigh_get_next(seq, v, NULL);
2744                 if (rc)
2745                         goto out;
2746                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2747                         rc = pneigh_get_first(seq);
2748         } else {
2749                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2750                 rc = pneigh_get_next(seq, v, NULL);
2751         }
2752 out:
2753         ++(*pos);
2754         return rc;
2755 }
2756 EXPORT_SYMBOL(neigh_seq_next);
2757
2758 void neigh_seq_stop(struct seq_file *seq, void *v)
2759         __releases(tbl->lock)
2760         __releases(rcu_bh)
2761 {
2762         struct neigh_seq_state *state = seq->private;
2763         struct neigh_table *tbl = state->tbl;
2764
2765         read_unlock(&tbl->lock);
2766         rcu_read_unlock_bh();
2767 }
2768 EXPORT_SYMBOL(neigh_seq_stop);
2769
2770 /* statistics via seq_file */
2771
2772 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2773 {
2774         struct neigh_table *tbl = seq->private;
2775         int cpu;
2776
2777         if (*pos == 0)
2778                 return SEQ_START_TOKEN;
2779
2780         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2781                 if (!cpu_possible(cpu))
2782                         continue;
2783                 *pos = cpu+1;
2784                 return per_cpu_ptr(tbl->stats, cpu);
2785         }
2786         return NULL;
2787 }
2788
2789 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2790 {
2791         struct neigh_table *tbl = seq->private;
2792         int cpu;
2793
2794         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2795                 if (!cpu_possible(cpu))
2796                         continue;
2797                 *pos = cpu+1;
2798                 return per_cpu_ptr(tbl->stats, cpu);
2799         }
2800         (*pos)++;
2801         return NULL;
2802 }
2803
2804 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2805 {
2806
2807 }
2808
2809 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2810 {
2811         struct neigh_table *tbl = seq->private;
2812         struct neigh_statistics *st = v;
2813
2814         if (v == SEQ_START_TOKEN) {
2815                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2816                 return 0;
2817         }
2818
2819         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2820                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2821                    atomic_read(&tbl->entries),
2822
2823                    st->allocs,
2824                    st->destroys,
2825                    st->hash_grows,
2826
2827                    st->lookups,
2828                    st->hits,
2829
2830                    st->res_failed,
2831
2832                    st->rcv_probes_mcast,
2833                    st->rcv_probes_ucast,
2834
2835                    st->periodic_gc_runs,
2836                    st->forced_gc_runs,
2837                    st->unres_discards,
2838                    st->table_fulls
2839                    );
2840
2841         return 0;
2842 }
2843
2844 static const struct seq_operations neigh_stat_seq_ops = {
2845         .start  = neigh_stat_seq_start,
2846         .next   = neigh_stat_seq_next,
2847         .stop   = neigh_stat_seq_stop,
2848         .show   = neigh_stat_seq_show,
2849 };
2850
2851 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2852 {
2853         int ret = seq_open(file, &neigh_stat_seq_ops);
2854
2855         if (!ret) {
2856                 struct seq_file *sf = file->private_data;
2857                 sf->private = PDE_DATA(inode);
2858         }
2859         return ret;
2860 };
2861
2862 static const struct file_operations neigh_stat_seq_fops = {
2863         .owner   = THIS_MODULE,
2864         .open    = neigh_stat_seq_open,
2865         .read    = seq_read,
2866         .llseek  = seq_lseek,
2867         .release = seq_release,
2868 };
2869
2870 #endif /* CONFIG_PROC_FS */
2871
2872 static inline size_t neigh_nlmsg_size(void)
2873 {
2874         return NLMSG_ALIGN(sizeof(struct ndmsg))
2875                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2876                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2877                + nla_total_size(sizeof(struct nda_cacheinfo))
2878                + nla_total_size(4); /* NDA_PROBES */
2879 }
2880
2881 static void __neigh_notify(struct neighbour *n, int type, int flags)
2882 {
2883         struct net *net = dev_net(n->dev);
2884         struct sk_buff *skb;
2885         int err = -ENOBUFS;
2886
2887         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2888         if (skb == NULL)
2889                 goto errout;
2890
2891         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2892         if (err < 0) {
2893                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2894                 WARN_ON(err == -EMSGSIZE);
2895                 kfree_skb(skb);
2896                 goto errout;
2897         }
2898         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2899         return;
2900 errout:
2901         if (err < 0)
2902                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2903 }
2904
2905 void neigh_app_ns(struct neighbour *n)
2906 {
2907         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2908 }
2909 EXPORT_SYMBOL(neigh_app_ns);
2910
2911 #ifdef CONFIG_SYSCTL
2912 static int zero;
2913 static int int_max = INT_MAX;
2914 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2915
2916 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2917                            void __user *buffer, size_t *lenp, loff_t *ppos)
2918 {
2919         int size, ret;
2920         struct ctl_table tmp = *ctl;
2921
2922         tmp.extra1 = &zero;
2923         tmp.extra2 = &unres_qlen_max;
2924         tmp.data = &size;
2925
2926         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2927         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2928
2929         if (write && !ret)
2930                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2931         return ret;
2932 }
2933
2934 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2935                                                    int family)
2936 {
2937         switch (family) {
2938         case AF_INET:
2939                 return __in_dev_arp_parms_get_rcu(dev);
2940         case AF_INET6:
2941                 return __in6_dev_nd_parms_get_rcu(dev);
2942         }
2943         return NULL;
2944 }
2945
2946 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2947                                   int index)
2948 {
2949         struct net_device *dev;
2950         int family = neigh_parms_family(p);
2951
2952         rcu_read_lock();
2953         for_each_netdev_rcu(net, dev) {
2954                 struct neigh_parms *dst_p =
2955                                 neigh_get_dev_parms_rcu(dev, family);
2956
2957                 if (dst_p && !test_bit(index, dst_p->data_state))
2958                         dst_p->data[index] = p->data[index];
2959         }
2960         rcu_read_unlock();
2961 }
2962
2963 static void neigh_proc_update(struct ctl_table *ctl, int write)
2964 {
2965         struct net_device *dev = ctl->extra1;
2966         struct neigh_parms *p = ctl->extra2;
2967         struct net *net = neigh_parms_net(p);
2968         int index = (int *) ctl->data - p->data;
2969
2970         if (!write)
2971                 return;
2972
2973         set_bit(index, p->data_state);
2974         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2975                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2976         if (!dev) /* NULL dev means this is default value */
2977                 neigh_copy_dflt_parms(net, p, index);
2978 }
2979
2980 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2981                                            void __user *buffer,
2982                                            size_t *lenp, loff_t *ppos)
2983 {
2984         struct ctl_table tmp = *ctl;
2985         int ret;
2986
2987         tmp.extra1 = &zero;
2988         tmp.extra2 = &int_max;
2989
2990         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2991         neigh_proc_update(ctl, write);
2992         return ret;
2993 }
2994
2995 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2996                         void __user *buffer, size_t *lenp, loff_t *ppos)
2997 {
2998         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2999
3000         neigh_proc_update(ctl, write);
3001         return ret;
3002 }
3003 EXPORT_SYMBOL(neigh_proc_dointvec);
3004
3005 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3006                                 void __user *buffer,
3007                                 size_t *lenp, loff_t *ppos)
3008 {
3009         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3010
3011         neigh_proc_update(ctl, write);
3012         return ret;
3013 }
3014 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3015
3016 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3017                                               void __user *buffer,
3018                                               size_t *lenp, loff_t *ppos)
3019 {
3020         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3021
3022         neigh_proc_update(ctl, write);
3023         return ret;
3024 }
3025
3026 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3027                                    void __user *buffer,
3028                                    size_t *lenp, loff_t *ppos)
3029 {
3030         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3031
3032         neigh_proc_update(ctl, write);
3033         return ret;
3034 }
3035 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3036
3037 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3038                                           void __user *buffer,
3039                                           size_t *lenp, loff_t *ppos)
3040 {
3041         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3042
3043         neigh_proc_update(ctl, write);
3044         return ret;
3045 }
3046
3047 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3048                                           void __user *buffer,
3049                                           size_t *lenp, loff_t *ppos)
3050 {
3051         struct neigh_parms *p = ctl->extra2;
3052         int ret;
3053
3054         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3055                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3056         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3057                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3058         else
3059                 ret = -1;
3060
3061         if (write && ret == 0) {
3062                 /* update reachable_time as well, otherwise, the change will
3063                  * only be effective after the next time neigh_periodic_work
3064                  * decides to recompute it
3065                  */
3066                 p->reachable_time =
3067                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3068         }
3069         return ret;
3070 }
3071
3072 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3073         (&((struct neigh_parms *) 0)->data[index])
3074
3075 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3076         [NEIGH_VAR_ ## attr] = { \
3077                 .procname       = name, \
3078                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3079                 .maxlen         = sizeof(int), \
3080                 .mode           = mval, \
3081                 .proc_handler   = proc, \
3082         }
3083
3084 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3085         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3086
3087 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3088         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3089
3090 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3091         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3092
3093 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3094         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3095
3096 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3097         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3098
3099 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3100         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3101
3102 static struct neigh_sysctl_table {
3103         struct ctl_table_header *sysctl_header;
3104         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3105 } neigh_sysctl_template __read_mostly = {
3106         .neigh_vars = {
3107                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3108                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3109                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3110                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3111                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3112                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3113                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3114                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3115                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3116                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3117                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3118                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3119                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3120                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3121                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3122                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3123                 [NEIGH_VAR_GC_INTERVAL] = {
3124                         .procname       = "gc_interval",
3125                         .maxlen         = sizeof(int),
3126                         .mode           = 0644,
3127                         .proc_handler   = proc_dointvec_jiffies,
3128                 },
3129                 [NEIGH_VAR_GC_THRESH1] = {
3130                         .procname       = "gc_thresh1",
3131                         .maxlen         = sizeof(int),
3132                         .mode           = 0644,
3133                         .extra1         = &zero,
3134                         .extra2         = &int_max,
3135                         .proc_handler   = proc_dointvec_minmax,
3136                 },
3137                 [NEIGH_VAR_GC_THRESH2] = {
3138                         .procname       = "gc_thresh2",
3139                         .maxlen         = sizeof(int),
3140                         .mode           = 0644,
3141                         .extra1         = &zero,
3142                         .extra2         = &int_max,
3143                         .proc_handler   = proc_dointvec_minmax,
3144                 },
3145                 [NEIGH_VAR_GC_THRESH3] = {
3146                         .procname       = "gc_thresh3",
3147                         .maxlen         = sizeof(int),
3148                         .mode           = 0644,
3149                         .extra1         = &zero,
3150                         .extra2         = &int_max,
3151                         .proc_handler   = proc_dointvec_minmax,
3152                 },
3153                 {},
3154         },
3155 };
3156
3157 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3158                           proc_handler *handler)
3159 {
3160         int i;
3161         struct neigh_sysctl_table *t;
3162         const char *dev_name_source;
3163         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3164         char *p_name;
3165
3166         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3167         if (!t)
3168                 goto err;
3169
3170         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3171                 t->neigh_vars[i].data += (long) p;
3172                 t->neigh_vars[i].extra1 = dev;
3173                 t->neigh_vars[i].extra2 = p;
3174         }
3175
3176         if (dev) {
3177                 dev_name_source = dev->name;
3178                 /* Terminate the table early */
3179                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3180                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3181         } else {
3182                 struct neigh_table *tbl = p->tbl;
3183                 dev_name_source = "default";
3184                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3185                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3186                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3187                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3188         }
3189
3190         if (handler) {
3191                 /* RetransTime */
3192                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3193                 /* ReachableTime */
3194                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3195                 /* RetransTime (in milliseconds)*/
3196                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3197                 /* ReachableTime (in milliseconds) */
3198                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3199         } else {
3200                 /* Those handlers will update p->reachable_time after
3201                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3202                  * applied after the next neighbour update instead of waiting for
3203                  * neigh_periodic_work to update its value (can be multiple minutes)
3204                  * So any handler that replaces them should do this as well
3205                  */
3206                 /* ReachableTime */
3207                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3208                         neigh_proc_base_reachable_time;
3209                 /* ReachableTime (in milliseconds) */
3210                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3211                         neigh_proc_base_reachable_time;
3212         }
3213
3214         /* Don't export sysctls to unprivileged users */
3215         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3216                 t->neigh_vars[0].procname = NULL;
3217
3218         switch (neigh_parms_family(p)) {
3219         case AF_INET:
3220               p_name = "ipv4";
3221               break;
3222         case AF_INET6:
3223               p_name = "ipv6";
3224               break;
3225         default:
3226               BUG();
3227         }
3228
3229         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3230                 p_name, dev_name_source);
3231         t->sysctl_header =
3232                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3233         if (!t->sysctl_header)
3234                 goto free;
3235
3236         p->sysctl_table = t;
3237         return 0;
3238
3239 free:
3240         kfree(t);
3241 err:
3242         return -ENOBUFS;
3243 }
3244 EXPORT_SYMBOL(neigh_sysctl_register);
3245
3246 void neigh_sysctl_unregister(struct neigh_parms *p)
3247 {
3248         if (p->sysctl_table) {
3249                 struct neigh_sysctl_table *t = p->sysctl_table;
3250                 p->sysctl_table = NULL;
3251                 unregister_net_sysctl_table(t->sysctl_header);
3252                 kfree(t);
3253         }
3254 }
3255 EXPORT_SYMBOL(neigh_sysctl_unregister);
3256
3257 #endif  /* CONFIG_SYSCTL */
3258
3259 static int __init neigh_init(void)
3260 {
3261         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3262         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3263         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3264
3265         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3266                       NULL);
3267         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3268
3269         return 0;
3270 }
3271
3272 subsys_initcall(neigh_init);
3273