GNU Linux-libre 4.4.297-gnu1
[releases.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/arp.h>
35 #include <net/dst.h>
36 #include <net/sock.h>
37 #include <net/netevent.h>
38 #include <net/netlink.h>
39 #include <linux/rtnetlink.h>
40 #include <linux/random.h>
41 #include <linux/string.h>
42 #include <linux/log2.h>
43 #include <linux/inetdevice.h>
44 #include <net/addrconf.h>
45
46 #define DEBUG
47 #define NEIGH_DEBUG 1
48 #define neigh_dbg(level, fmt, ...)              \
49 do {                                            \
50         if (level <= NEIGH_DEBUG)               \
51                 pr_debug(fmt, ##__VA_ARGS__);   \
52 } while (0)
53
54 #define PNEIGH_HASHMASK         0xF
55
56 static void neigh_timer_handler(unsigned long arg);
57 static void __neigh_notify(struct neighbour *n, int type, int flags);
58 static void neigh_update_notify(struct neighbour *neigh);
59 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
60                                     struct net_device *dev);
61
62 #ifdef CONFIG_PROC_FS
63 static const struct file_operations neigh_stat_seq_fops;
64 #endif
65
66 /*
67    Neighbour hash table buckets are protected with rwlock tbl->lock.
68
69    - All the scans/updates to hash buckets MUST be made under this lock.
70    - NOTHING clever should be made under this lock: no callbacks
71      to protocol backends, no attempts to send something to network.
72      It will result in deadlocks, if backend/driver wants to use neighbour
73      cache.
74    - If the entry requires some non-trivial actions, increase
75      its reference count and release table lock.
76
77    Neighbour entries are protected:
78    - with reference count.
79    - with rwlock neigh->lock
80
81    Reference count prevents destruction.
82
83    neigh->lock mainly serializes ll address data and its validity state.
84    However, the same lock is used to protect another entry fields:
85     - timer
86     - resolution queue
87
88    Again, nothing clever shall be made under neigh->lock,
89    the most complicated procedure, which we allow is dev->hard_header.
90    It is supposed, that dev->hard_header is simplistic and does
91    not make callbacks to neighbour tables.
92  */
93
94 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
95 {
96         kfree_skb(skb);
97         return -ENETDOWN;
98 }
99
100 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 {
102         if (neigh->parms->neigh_cleanup)
103                 neigh->parms->neigh_cleanup(neigh);
104
105         __neigh_notify(neigh, RTM_DELNEIGH, 0);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121
122 static int neigh_forced_gc(struct neigh_table *tbl)
123 {
124         int shrunk = 0;
125         int i;
126         struct neigh_hash_table *nht;
127
128         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
129
130         write_lock_bh(&tbl->lock);
131         nht = rcu_dereference_protected(tbl->nht,
132                                         lockdep_is_held(&tbl->lock));
133         for (i = 0; i < (1 << nht->hash_shift); i++) {
134                 struct neighbour *n;
135                 struct neighbour __rcu **np;
136
137                 np = &nht->hash_buckets[i];
138                 while ((n = rcu_dereference_protected(*np,
139                                         lockdep_is_held(&tbl->lock))) != NULL) {
140                         /* Neighbour record may be discarded if:
141                          * - nobody refers to it.
142                          * - it is not permanent
143                          */
144                         write_lock(&n->lock);
145                         if (atomic_read(&n->refcnt) == 1 &&
146                             !(n->nud_state & NUD_PERMANENT)) {
147                                 rcu_assign_pointer(*np,
148                                         rcu_dereference_protected(n->next,
149                                                   lockdep_is_held(&tbl->lock)));
150                                 n->dead = 1;
151                                 shrunk  = 1;
152                                 write_unlock(&n->lock);
153                                 neigh_cleanup_and_release(n);
154                                 continue;
155                         }
156                         write_unlock(&n->lock);
157                         np = &n->next;
158                 }
159         }
160
161         tbl->last_flush = jiffies;
162
163         write_unlock_bh(&tbl->lock);
164
165         return shrunk;
166 }
167
168 static void neigh_add_timer(struct neighbour *n, unsigned long when)
169 {
170         neigh_hold(n);
171         if (unlikely(mod_timer(&n->timer, when))) {
172                 printk("NEIGH: BUG, double timer add, state is %x\n",
173                        n->nud_state);
174                 dump_stack();
175         }
176 }
177
178 static int neigh_del_timer(struct neighbour *n)
179 {
180         if ((n->nud_state & NUD_IN_TIMER) &&
181             del_timer(&n->timer)) {
182                 neigh_release(n);
183                 return 1;
184         }
185         return 0;
186 }
187
188 static void pneigh_queue_purge(struct sk_buff_head *list)
189 {
190         struct sk_buff *skb;
191
192         while ((skb = skb_dequeue(list)) != NULL) {
193                 dev_put(skb->dev);
194                 kfree_skb(skb);
195         }
196 }
197
198 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
199 {
200         int i;
201         struct neigh_hash_table *nht;
202
203         nht = rcu_dereference_protected(tbl->nht,
204                                         lockdep_is_held(&tbl->lock));
205
206         for (i = 0; i < (1 << nht->hash_shift); i++) {
207                 struct neighbour *n;
208                 struct neighbour __rcu **np = &nht->hash_buckets[i];
209
210                 while ((n = rcu_dereference_protected(*np,
211                                         lockdep_is_held(&tbl->lock))) != NULL) {
212                         if (dev && n->dev != dev) {
213                                 np = &n->next;
214                                 continue;
215                         }
216                         rcu_assign_pointer(*np,
217                                    rcu_dereference_protected(n->next,
218                                                 lockdep_is_held(&tbl->lock)));
219                         write_lock(&n->lock);
220                         neigh_del_timer(n);
221                         n->dead = 1;
222
223                         if (atomic_read(&n->refcnt) != 1) {
224                                 /* The most unpleasant situation.
225                                    We must destroy neighbour entry,
226                                    but someone still uses it.
227
228                                    The destroy will be delayed until
229                                    the last user releases us, but
230                                    we must kill timers etc. and move
231                                    it to safe state.
232                                  */
233                                 __skb_queue_purge(&n->arp_queue);
234                                 n->arp_queue_len_bytes = 0;
235                                 n->output = neigh_blackhole;
236                                 if (n->nud_state & NUD_VALID)
237                                         n->nud_state = NUD_NOARP;
238                                 else
239                                         n->nud_state = NUD_NONE;
240                                 neigh_dbg(2, "neigh %p is stray\n", n);
241                         }
242                         write_unlock(&n->lock);
243                         neigh_cleanup_and_release(n);
244                 }
245         }
246 }
247
248 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
249 {
250         write_lock_bh(&tbl->lock);
251         neigh_flush_dev(tbl, dev);
252         write_unlock_bh(&tbl->lock);
253 }
254 EXPORT_SYMBOL(neigh_changeaddr);
255
256 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
257 {
258         write_lock_bh(&tbl->lock);
259         neigh_flush_dev(tbl, dev);
260         pneigh_ifdown_and_unlock(tbl, dev);
261
262         del_timer_sync(&tbl->proxy_timer);
263         pneigh_queue_purge(&tbl->proxy_queue);
264         return 0;
265 }
266 EXPORT_SYMBOL(neigh_ifdown);
267
268 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
269 {
270         struct neighbour *n = NULL;
271         unsigned long now = jiffies;
272         int entries;
273
274         entries = atomic_inc_return(&tbl->entries) - 1;
275         if (entries >= tbl->gc_thresh3 ||
276             (entries >= tbl->gc_thresh2 &&
277              time_after(now, tbl->last_flush + 5 * HZ))) {
278                 if (!neigh_forced_gc(tbl) &&
279                     entries >= tbl->gc_thresh3) {
280                         net_info_ratelimited("%s: neighbor table overflow!\n",
281                                              tbl->id);
282                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
283                         goto out_entries;
284                 }
285         }
286
287         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
288         if (!n)
289                 goto out_entries;
290
291         __skb_queue_head_init(&n->arp_queue);
292         rwlock_init(&n->lock);
293         seqlock_init(&n->ha_lock);
294         n->updated        = n->used = now;
295         n->nud_state      = NUD_NONE;
296         n->output         = neigh_blackhole;
297         seqlock_init(&n->hh.hh_lock);
298         n->parms          = neigh_parms_clone(&tbl->parms);
299         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
300
301         NEIGH_CACHE_STAT_INC(tbl, allocs);
302         n->tbl            = tbl;
303         atomic_set(&n->refcnt, 1);
304         n->dead           = 1;
305 out:
306         return n;
307
308 out_entries:
309         atomic_dec(&tbl->entries);
310         goto out;
311 }
312
313 static void neigh_get_hash_rnd(u32 *x)
314 {
315         get_random_bytes(x, sizeof(*x));
316         *x |= 1;
317 }
318
319 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
320 {
321         size_t size = (1 << shift) * sizeof(struct neighbour *);
322         struct neigh_hash_table *ret;
323         struct neighbour __rcu **buckets;
324         int i;
325
326         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
327         if (!ret)
328                 return NULL;
329         if (size <= PAGE_SIZE) {
330                 buckets = kzalloc(size, GFP_ATOMIC);
331         } else {
332                 buckets = (struct neighbour __rcu **)
333                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
334                                            get_order(size));
335                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
336         }
337         if (!buckets) {
338                 kfree(ret);
339                 return NULL;
340         }
341         ret->hash_buckets = buckets;
342         ret->hash_shift = shift;
343         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
344                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
345         return ret;
346 }
347
348 static void neigh_hash_free_rcu(struct rcu_head *head)
349 {
350         struct neigh_hash_table *nht = container_of(head,
351                                                     struct neigh_hash_table,
352                                                     rcu);
353         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
354         struct neighbour __rcu **buckets = nht->hash_buckets;
355
356         if (size <= PAGE_SIZE) {
357                 kfree(buckets);
358         } else {
359                 kmemleak_free(buckets);
360                 free_pages((unsigned long)buckets, get_order(size));
361         }
362         kfree(nht);
363 }
364
365 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
366                                                 unsigned long new_shift)
367 {
368         unsigned int i, hash;
369         struct neigh_hash_table *new_nht, *old_nht;
370
371         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
372
373         old_nht = rcu_dereference_protected(tbl->nht,
374                                             lockdep_is_held(&tbl->lock));
375         new_nht = neigh_hash_alloc(new_shift);
376         if (!new_nht)
377                 return old_nht;
378
379         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
380                 struct neighbour *n, *next;
381
382                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
383                                                    lockdep_is_held(&tbl->lock));
384                      n != NULL;
385                      n = next) {
386                         hash = tbl->hash(n->primary_key, n->dev,
387                                          new_nht->hash_rnd);
388
389                         hash >>= (32 - new_nht->hash_shift);
390                         next = rcu_dereference_protected(n->next,
391                                                 lockdep_is_held(&tbl->lock));
392
393                         rcu_assign_pointer(n->next,
394                                            rcu_dereference_protected(
395                                                 new_nht->hash_buckets[hash],
396                                                 lockdep_is_held(&tbl->lock)));
397                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
398                 }
399         }
400
401         rcu_assign_pointer(tbl->nht, new_nht);
402         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
403         return new_nht;
404 }
405
406 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
407                                struct net_device *dev)
408 {
409         struct neighbour *n;
410
411         NEIGH_CACHE_STAT_INC(tbl, lookups);
412
413         rcu_read_lock_bh();
414         n = __neigh_lookup_noref(tbl, pkey, dev);
415         if (n) {
416                 if (!atomic_inc_not_zero(&n->refcnt))
417                         n = NULL;
418                 NEIGH_CACHE_STAT_INC(tbl, hits);
419         }
420
421         rcu_read_unlock_bh();
422         return n;
423 }
424 EXPORT_SYMBOL(neigh_lookup);
425
426 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
427                                      const void *pkey)
428 {
429         struct neighbour *n;
430         int key_len = tbl->key_len;
431         u32 hash_val;
432         struct neigh_hash_table *nht;
433
434         NEIGH_CACHE_STAT_INC(tbl, lookups);
435
436         rcu_read_lock_bh();
437         nht = rcu_dereference_bh(tbl->nht);
438         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
439
440         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
441              n != NULL;
442              n = rcu_dereference_bh(n->next)) {
443                 if (!memcmp(n->primary_key, pkey, key_len) &&
444                     net_eq(dev_net(n->dev), net)) {
445                         if (!atomic_inc_not_zero(&n->refcnt))
446                                 n = NULL;
447                         NEIGH_CACHE_STAT_INC(tbl, hits);
448                         break;
449                 }
450         }
451
452         rcu_read_unlock_bh();
453         return n;
454 }
455 EXPORT_SYMBOL(neigh_lookup_nodev);
456
457 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
458                                  struct net_device *dev, bool want_ref)
459 {
460         u32 hash_val;
461         int key_len = tbl->key_len;
462         int error;
463         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
464         struct neigh_hash_table *nht;
465
466         if (!n) {
467                 rc = ERR_PTR(-ENOBUFS);
468                 goto out;
469         }
470
471         memcpy(n->primary_key, pkey, key_len);
472         n->dev = dev;
473         dev_hold(dev);
474
475         /* Protocol specific setup. */
476         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
477                 rc = ERR_PTR(error);
478                 goto out_neigh_release;
479         }
480
481         if (dev->netdev_ops->ndo_neigh_construct) {
482                 error = dev->netdev_ops->ndo_neigh_construct(n);
483                 if (error < 0) {
484                         rc = ERR_PTR(error);
485                         goto out_neigh_release;
486                 }
487         }
488
489         /* Device specific setup. */
490         if (n->parms->neigh_setup &&
491             (error = n->parms->neigh_setup(n)) < 0) {
492                 rc = ERR_PTR(error);
493                 goto out_neigh_release;
494         }
495
496         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
497
498         write_lock_bh(&tbl->lock);
499         nht = rcu_dereference_protected(tbl->nht,
500                                         lockdep_is_held(&tbl->lock));
501
502         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
503                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
504
505         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
506
507         if (n->parms->dead) {
508                 rc = ERR_PTR(-EINVAL);
509                 goto out_tbl_unlock;
510         }
511
512         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
513                                             lockdep_is_held(&tbl->lock));
514              n1 != NULL;
515              n1 = rcu_dereference_protected(n1->next,
516                         lockdep_is_held(&tbl->lock))) {
517                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
518                         if (want_ref)
519                                 neigh_hold(n1);
520                         rc = n1;
521                         goto out_tbl_unlock;
522                 }
523         }
524
525         n->dead = 0;
526         if (want_ref)
527                 neigh_hold(n);
528         rcu_assign_pointer(n->next,
529                            rcu_dereference_protected(nht->hash_buckets[hash_val],
530                                                      lockdep_is_held(&tbl->lock)));
531         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
532         write_unlock_bh(&tbl->lock);
533         neigh_dbg(2, "neigh %p is created\n", n);
534         rc = n;
535 out:
536         return rc;
537 out_tbl_unlock:
538         write_unlock_bh(&tbl->lock);
539 out_neigh_release:
540         neigh_release(n);
541         goto out;
542 }
543 EXPORT_SYMBOL(__neigh_create);
544
545 static u32 pneigh_hash(const void *pkey, int key_len)
546 {
547         u32 hash_val = *(u32 *)(pkey + key_len - 4);
548         hash_val ^= (hash_val >> 16);
549         hash_val ^= hash_val >> 8;
550         hash_val ^= hash_val >> 4;
551         hash_val &= PNEIGH_HASHMASK;
552         return hash_val;
553 }
554
555 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
556                                               struct net *net,
557                                               const void *pkey,
558                                               int key_len,
559                                               struct net_device *dev)
560 {
561         while (n) {
562                 if (!memcmp(n->key, pkey, key_len) &&
563                     net_eq(pneigh_net(n), net) &&
564                     (n->dev == dev || !n->dev))
565                         return n;
566                 n = n->next;
567         }
568         return NULL;
569 }
570
571 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
572                 struct net *net, const void *pkey, struct net_device *dev)
573 {
574         int key_len = tbl->key_len;
575         u32 hash_val = pneigh_hash(pkey, key_len);
576
577         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
578                                  net, pkey, key_len, dev);
579 }
580 EXPORT_SYMBOL_GPL(__pneigh_lookup);
581
582 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
583                                     struct net *net, const void *pkey,
584                                     struct net_device *dev, int creat)
585 {
586         struct pneigh_entry *n;
587         int key_len = tbl->key_len;
588         u32 hash_val = pneigh_hash(pkey, key_len);
589
590         read_lock_bh(&tbl->lock);
591         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
592                               net, pkey, key_len, dev);
593         read_unlock_bh(&tbl->lock);
594
595         if (n || !creat)
596                 goto out;
597
598         ASSERT_RTNL();
599
600         n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
601         if (!n)
602                 goto out;
603
604         write_pnet(&n->net, net);
605         memcpy(n->key, pkey, key_len);
606         n->dev = dev;
607         if (dev)
608                 dev_hold(dev);
609
610         if (tbl->pconstructor && tbl->pconstructor(n)) {
611                 if (dev)
612                         dev_put(dev);
613                 kfree(n);
614                 n = NULL;
615                 goto out;
616         }
617
618         write_lock_bh(&tbl->lock);
619         n->next = tbl->phash_buckets[hash_val];
620         tbl->phash_buckets[hash_val] = n;
621         write_unlock_bh(&tbl->lock);
622 out:
623         return n;
624 }
625 EXPORT_SYMBOL(pneigh_lookup);
626
627
628 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
629                   struct net_device *dev)
630 {
631         struct pneigh_entry *n, **np;
632         int key_len = tbl->key_len;
633         u32 hash_val = pneigh_hash(pkey, key_len);
634
635         write_lock_bh(&tbl->lock);
636         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
637              np = &n->next) {
638                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
639                     net_eq(pneigh_net(n), net)) {
640                         *np = n->next;
641                         write_unlock_bh(&tbl->lock);
642                         if (tbl->pdestructor)
643                                 tbl->pdestructor(n);
644                         if (n->dev)
645                                 dev_put(n->dev);
646                         kfree(n);
647                         return 0;
648                 }
649         }
650         write_unlock_bh(&tbl->lock);
651         return -ENOENT;
652 }
653
654 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
655                                     struct net_device *dev)
656 {
657         struct pneigh_entry *n, **np, *freelist = NULL;
658         u32 h;
659
660         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
661                 np = &tbl->phash_buckets[h];
662                 while ((n = *np) != NULL) {
663                         if (!dev || n->dev == dev) {
664                                 *np = n->next;
665                                 n->next = freelist;
666                                 freelist = n;
667                                 continue;
668                         }
669                         np = &n->next;
670                 }
671         }
672         write_unlock_bh(&tbl->lock);
673         while ((n = freelist)) {
674                 freelist = n->next;
675                 n->next = NULL;
676                 if (tbl->pdestructor)
677                         tbl->pdestructor(n);
678                 if (n->dev)
679                         dev_put(n->dev);
680                 kfree(n);
681         }
682         return -ENOENT;
683 }
684
685 static void neigh_parms_destroy(struct neigh_parms *parms);
686
687 static inline void neigh_parms_put(struct neigh_parms *parms)
688 {
689         if (atomic_dec_and_test(&parms->refcnt))
690                 neigh_parms_destroy(parms);
691 }
692
693 /*
694  *      neighbour must already be out of the table;
695  *
696  */
697 void neigh_destroy(struct neighbour *neigh)
698 {
699         struct net_device *dev = neigh->dev;
700
701         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
702
703         if (!neigh->dead) {
704                 pr_warn("Destroying alive neighbour %p\n", neigh);
705                 dump_stack();
706                 return;
707         }
708
709         if (neigh_del_timer(neigh))
710                 pr_warn("Impossible event\n");
711
712         write_lock_bh(&neigh->lock);
713         __skb_queue_purge(&neigh->arp_queue);
714         write_unlock_bh(&neigh->lock);
715         neigh->arp_queue_len_bytes = 0;
716
717         if (dev->netdev_ops->ndo_neigh_destroy)
718                 dev->netdev_ops->ndo_neigh_destroy(neigh);
719
720         dev_put(dev);
721         neigh_parms_put(neigh->parms);
722
723         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
724
725         atomic_dec(&neigh->tbl->entries);
726         kfree_rcu(neigh, rcu);
727 }
728 EXPORT_SYMBOL(neigh_destroy);
729
730 /* Neighbour state is suspicious;
731    disable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_suspect(struct neighbour *neigh)
736 {
737         neigh_dbg(2, "neigh %p is suspected\n", neigh);
738
739         neigh->output = neigh->ops->output;
740 }
741
742 /* Neighbour state is OK;
743    enable fast path.
744
745    Called with write_locked neigh.
746  */
747 static void neigh_connect(struct neighbour *neigh)
748 {
749         neigh_dbg(2, "neigh %p is connected\n", neigh);
750
751         neigh->output = neigh->ops->connected_output;
752 }
753
754 static void neigh_periodic_work(struct work_struct *work)
755 {
756         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
757         struct neighbour *n;
758         struct neighbour __rcu **np;
759         unsigned int i;
760         struct neigh_hash_table *nht;
761
762         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
763
764         write_lock_bh(&tbl->lock);
765         nht = rcu_dereference_protected(tbl->nht,
766                                         lockdep_is_held(&tbl->lock));
767
768         /*
769          *      periodically recompute ReachableTime from random function
770          */
771
772         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773                 struct neigh_parms *p;
774                 tbl->last_rand = jiffies;
775                 list_for_each_entry(p, &tbl->parms_list, list)
776                         p->reachable_time =
777                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
778         }
779
780         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
781                 goto out;
782
783         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
784                 np = &nht->hash_buckets[i];
785
786                 while ((n = rcu_dereference_protected(*np,
787                                 lockdep_is_held(&tbl->lock))) != NULL) {
788                         unsigned int state;
789
790                         write_lock(&n->lock);
791
792                         state = n->nud_state;
793                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
794                                 write_unlock(&n->lock);
795                                 goto next_elt;
796                         }
797
798                         if (time_before(n->used, n->confirmed))
799                                 n->used = n->confirmed;
800
801                         if (atomic_read(&n->refcnt) == 1 &&
802                             (state == NUD_FAILED ||
803                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
804                                 *np = n->next;
805                                 n->dead = 1;
806                                 write_unlock(&n->lock);
807                                 neigh_cleanup_and_release(n);
808                                 continue;
809                         }
810                         write_unlock(&n->lock);
811
812 next_elt:
813                         np = &n->next;
814                 }
815                 /*
816                  * It's fine to release lock here, even if hash table
817                  * grows while we are preempted.
818                  */
819                 write_unlock_bh(&tbl->lock);
820                 cond_resched();
821                 write_lock_bh(&tbl->lock);
822                 nht = rcu_dereference_protected(tbl->nht,
823                                                 lockdep_is_held(&tbl->lock));
824         }
825 out:
826         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
827          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
828          * BASE_REACHABLE_TIME.
829          */
830         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
831                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
832         write_unlock_bh(&tbl->lock);
833 }
834
835 static __inline__ int neigh_max_probes(struct neighbour *n)
836 {
837         struct neigh_parms *p = n->parms;
838         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
839                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
840                 NEIGH_VAR(p, MCAST_PROBES));
841 }
842
843 static void neigh_invalidate(struct neighbour *neigh)
844         __releases(neigh->lock)
845         __acquires(neigh->lock)
846 {
847         struct sk_buff *skb;
848
849         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
850         neigh_dbg(2, "neigh %p is failed\n", neigh);
851         neigh->updated = jiffies;
852
853         /* It is very thin place. report_unreachable is very complicated
854            routine. Particularly, it can hit the same neighbour entry!
855
856            So that, we try to be accurate and avoid dead loop. --ANK
857          */
858         while (neigh->nud_state == NUD_FAILED &&
859                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
860                 write_unlock(&neigh->lock);
861                 neigh->ops->error_report(neigh, skb);
862                 write_lock(&neigh->lock);
863         }
864         __skb_queue_purge(&neigh->arp_queue);
865         neigh->arp_queue_len_bytes = 0;
866 }
867
868 static void neigh_probe(struct neighbour *neigh)
869         __releases(neigh->lock)
870 {
871         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
872         /* keep skb alive even if arp_queue overflows */
873         if (skb)
874                 skb = skb_clone(skb, GFP_ATOMIC);
875         write_unlock(&neigh->lock);
876         if (neigh->ops->solicit)
877                 neigh->ops->solicit(neigh, skb);
878         atomic_inc(&neigh->probes);
879         kfree_skb(skb);
880 }
881
882 /* Called when a timer expires for a neighbour entry. */
883
884 static void neigh_timer_handler(unsigned long arg)
885 {
886         unsigned long now, next;
887         struct neighbour *neigh = (struct neighbour *)arg;
888         unsigned int state;
889         int notify = 0;
890
891         write_lock(&neigh->lock);
892
893         state = neigh->nud_state;
894         now = jiffies;
895         next = now + HZ;
896
897         if (!(state & NUD_IN_TIMER))
898                 goto out;
899
900         if (state & NUD_REACHABLE) {
901                 if (time_before_eq(now,
902                                    neigh->confirmed + neigh->parms->reachable_time)) {
903                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
904                         next = neigh->confirmed + neigh->parms->reachable_time;
905                 } else if (time_before_eq(now,
906                                           neigh->used +
907                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
908                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
909                         neigh->nud_state = NUD_DELAY;
910                         neigh->updated = jiffies;
911                         neigh_suspect(neigh);
912                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
913                 } else {
914                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
915                         neigh->nud_state = NUD_STALE;
916                         neigh->updated = jiffies;
917                         neigh_suspect(neigh);
918                         notify = 1;
919                 }
920         } else if (state & NUD_DELAY) {
921                 if (time_before_eq(now,
922                                    neigh->confirmed +
923                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
924                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
925                         neigh->nud_state = NUD_REACHABLE;
926                         neigh->updated = jiffies;
927                         neigh_connect(neigh);
928                         notify = 1;
929                         next = neigh->confirmed + neigh->parms->reachable_time;
930                 } else {
931                         neigh_dbg(2, "neigh %p is probed\n", neigh);
932                         neigh->nud_state = NUD_PROBE;
933                         neigh->updated = jiffies;
934                         atomic_set(&neigh->probes, 0);
935                         notify = 1;
936                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
937                 }
938         } else {
939                 /* NUD_PROBE|NUD_INCOMPLETE */
940                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
941         }
942
943         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
944             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
945                 neigh->nud_state = NUD_FAILED;
946                 notify = 1;
947                 neigh_invalidate(neigh);
948                 goto out;
949         }
950
951         if (neigh->nud_state & NUD_IN_TIMER) {
952                 if (time_before(next, jiffies + HZ/2))
953                         next = jiffies + HZ/2;
954                 if (!mod_timer(&neigh->timer, next))
955                         neigh_hold(neigh);
956         }
957         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
958                 neigh_probe(neigh);
959         } else {
960 out:
961                 write_unlock(&neigh->lock);
962         }
963
964         if (notify)
965                 neigh_update_notify(neigh);
966
967         neigh_release(neigh);
968 }
969
970 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
971 {
972         int rc;
973         bool immediate_probe = false;
974
975         write_lock_bh(&neigh->lock);
976
977         rc = 0;
978         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
979                 goto out_unlock_bh;
980         if (neigh->dead)
981                 goto out_dead;
982
983         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
984                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
985                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
986                         unsigned long next, now = jiffies;
987
988                         atomic_set(&neigh->probes,
989                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
990                         neigh_del_timer(neigh);
991                         neigh->nud_state     = NUD_INCOMPLETE;
992                         neigh->updated = now;
993                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
994                                          HZ/2);
995                         neigh_add_timer(neigh, next);
996                         immediate_probe = true;
997                 } else {
998                         neigh->nud_state = NUD_FAILED;
999                         neigh->updated = jiffies;
1000                         write_unlock_bh(&neigh->lock);
1001
1002                         kfree_skb(skb);
1003                         return 1;
1004                 }
1005         } else if (neigh->nud_state & NUD_STALE) {
1006                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1007                 neigh_del_timer(neigh);
1008                 neigh->nud_state = NUD_DELAY;
1009                 neigh->updated = jiffies;
1010                 neigh_add_timer(neigh, jiffies +
1011                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1012         }
1013
1014         if (neigh->nud_state == NUD_INCOMPLETE) {
1015                 if (skb) {
1016                         while (neigh->arp_queue_len_bytes + skb->truesize >
1017                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1018                                 struct sk_buff *buff;
1019
1020                                 buff = __skb_dequeue(&neigh->arp_queue);
1021                                 if (!buff)
1022                                         break;
1023                                 neigh->arp_queue_len_bytes -= buff->truesize;
1024                                 kfree_skb(buff);
1025                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1026                         }
1027                         skb_dst_force(skb);
1028                         __skb_queue_tail(&neigh->arp_queue, skb);
1029                         neigh->arp_queue_len_bytes += skb->truesize;
1030                 }
1031                 rc = 1;
1032         }
1033 out_unlock_bh:
1034         if (immediate_probe)
1035                 neigh_probe(neigh);
1036         else
1037                 write_unlock(&neigh->lock);
1038         local_bh_enable();
1039         return rc;
1040
1041 out_dead:
1042         if (neigh->nud_state & NUD_STALE)
1043                 goto out_unlock_bh;
1044         write_unlock_bh(&neigh->lock);
1045         kfree_skb(skb);
1046         return 1;
1047 }
1048 EXPORT_SYMBOL(__neigh_event_send);
1049
1050 static void neigh_update_hhs(struct neighbour *neigh)
1051 {
1052         struct hh_cache *hh;
1053         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1054                 = NULL;
1055
1056         if (neigh->dev->header_ops)
1057                 update = neigh->dev->header_ops->cache_update;
1058
1059         if (update) {
1060                 hh = &neigh->hh;
1061                 if (READ_ONCE(hh->hh_len)) {
1062                         write_seqlock_bh(&hh->hh_lock);
1063                         update(hh, neigh->dev, neigh->ha);
1064                         write_sequnlock_bh(&hh->hh_lock);
1065                 }
1066         }
1067 }
1068
1069
1070
1071 /* Generic update routine.
1072    -- lladdr is new lladdr or NULL, if it is not supplied.
1073    -- new    is new state.
1074    -- flags
1075         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1076                                 if it is different.
1077         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1078                                 lladdr instead of overriding it
1079                                 if it is different.
1080                                 It also allows to retain current state
1081                                 if lladdr is unchanged.
1082         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1083
1084         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1085                                 NTF_ROUTER flag.
1086         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1087                                 a router.
1088
1089    Caller MUST hold reference count on the entry.
1090  */
1091
1092 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1093                  u32 flags)
1094 {
1095         u8 old;
1096         int err;
1097         int notify = 0;
1098         struct net_device *dev;
1099         int update_isrouter = 0;
1100
1101         write_lock_bh(&neigh->lock);
1102
1103         dev    = neigh->dev;
1104         old    = neigh->nud_state;
1105         err    = -EPERM;
1106
1107         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1108             (old & (NUD_NOARP | NUD_PERMANENT)))
1109                 goto out;
1110         if (neigh->dead)
1111                 goto out;
1112
1113         if (!(new & NUD_VALID)) {
1114                 neigh_del_timer(neigh);
1115                 if (old & NUD_CONNECTED)
1116                         neigh_suspect(neigh);
1117                 neigh->nud_state = new;
1118                 err = 0;
1119                 notify = old & NUD_VALID;
1120                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1121                     (new & NUD_FAILED)) {
1122                         neigh_invalidate(neigh);
1123                         notify = 1;
1124                 }
1125                 goto out;
1126         }
1127
1128         /* Compare new lladdr with cached one */
1129         if (!dev->addr_len) {
1130                 /* First case: device needs no address. */
1131                 lladdr = neigh->ha;
1132         } else if (lladdr) {
1133                 /* The second case: if something is already cached
1134                    and a new address is proposed:
1135                    - compare new & old
1136                    - if they are different, check override flag
1137                  */
1138                 if ((old & NUD_VALID) &&
1139                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1140                         lladdr = neigh->ha;
1141         } else {
1142                 /* No address is supplied; if we know something,
1143                    use it, otherwise discard the request.
1144                  */
1145                 err = -EINVAL;
1146                 if (!(old & NUD_VALID))
1147                         goto out;
1148                 lladdr = neigh->ha;
1149         }
1150
1151         /* Update confirmed timestamp for neighbour entry after we
1152          * received ARP packet even if it doesn't change IP to MAC binding.
1153          */
1154         if (new & NUD_CONNECTED)
1155                 neigh->confirmed = jiffies;
1156
1157         /* If entry was valid and address is not changed,
1158            do not change entry state, if new one is STALE.
1159          */
1160         err = 0;
1161         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1162         if (old & NUD_VALID) {
1163                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1164                         update_isrouter = 0;
1165                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1166                             (old & NUD_CONNECTED)) {
1167                                 lladdr = neigh->ha;
1168                                 new = NUD_STALE;
1169                         } else
1170                                 goto out;
1171                 } else {
1172                         if (lladdr == neigh->ha && new == NUD_STALE &&
1173                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1174                              (old & NUD_CONNECTED))
1175                             )
1176                                 new = old;
1177                 }
1178         }
1179
1180         /* Update timestamp only once we know we will make a change to the
1181          * neighbour entry. Otherwise we risk to move the locktime window with
1182          * noop updates and ignore relevant ARP updates.
1183          */
1184         if (new != old || lladdr != neigh->ha)
1185                 neigh->updated = jiffies;
1186
1187         if (new != old) {
1188                 neigh_del_timer(neigh);
1189                 if (new & NUD_PROBE)
1190                         atomic_set(&neigh->probes, 0);
1191                 if (new & NUD_IN_TIMER)
1192                         neigh_add_timer(neigh, (jiffies +
1193                                                 ((new & NUD_REACHABLE) ?
1194                                                  neigh->parms->reachable_time :
1195                                                  0)));
1196                 neigh->nud_state = new;
1197                 notify = 1;
1198         }
1199
1200         if (lladdr != neigh->ha) {
1201                 write_seqlock(&neigh->ha_lock);
1202                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1203                 write_sequnlock(&neigh->ha_lock);
1204                 neigh_update_hhs(neigh);
1205                 if (!(new & NUD_CONNECTED))
1206                         neigh->confirmed = jiffies -
1207                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1208                 notify = 1;
1209         }
1210         if (new == old)
1211                 goto out;
1212         if (new & NUD_CONNECTED)
1213                 neigh_connect(neigh);
1214         else
1215                 neigh_suspect(neigh);
1216         if (!(old & NUD_VALID)) {
1217                 struct sk_buff *skb;
1218
1219                 /* Again: avoid dead loop if something went wrong */
1220
1221                 while (neigh->nud_state & NUD_VALID &&
1222                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1223                         struct dst_entry *dst = skb_dst(skb);
1224                         struct neighbour *n2, *n1 = neigh;
1225                         write_unlock_bh(&neigh->lock);
1226
1227                         rcu_read_lock();
1228
1229                         /* Why not just use 'neigh' as-is?  The problem is that
1230                          * things such as shaper, eql, and sch_teql can end up
1231                          * using alternative, different, neigh objects to output
1232                          * the packet in the output path.  So what we need to do
1233                          * here is re-lookup the top-level neigh in the path so
1234                          * we can reinject the packet there.
1235                          */
1236                         n2 = NULL;
1237                         if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1238                                 n2 = dst_neigh_lookup_skb(dst, skb);
1239                                 if (n2)
1240                                         n1 = n2;
1241                         }
1242                         n1->output(n1, skb);
1243                         if (n2)
1244                                 neigh_release(n2);
1245                         rcu_read_unlock();
1246
1247                         write_lock_bh(&neigh->lock);
1248                 }
1249                 __skb_queue_purge(&neigh->arp_queue);
1250                 neigh->arp_queue_len_bytes = 0;
1251         }
1252 out:
1253         if (update_isrouter) {
1254                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1255                         (neigh->flags | NTF_ROUTER) :
1256                         (neigh->flags & ~NTF_ROUTER);
1257         }
1258         write_unlock_bh(&neigh->lock);
1259
1260         if (notify)
1261                 neigh_update_notify(neigh);
1262
1263         return err;
1264 }
1265 EXPORT_SYMBOL(neigh_update);
1266
1267 /* Update the neigh to listen temporarily for probe responses, even if it is
1268  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1269  */
1270 void __neigh_set_probe_once(struct neighbour *neigh)
1271 {
1272         if (neigh->dead)
1273                 return;
1274         neigh->updated = jiffies;
1275         if (!(neigh->nud_state & NUD_FAILED))
1276                 return;
1277         neigh->nud_state = NUD_INCOMPLETE;
1278         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1279         neigh_add_timer(neigh,
1280                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1281 }
1282 EXPORT_SYMBOL(__neigh_set_probe_once);
1283
1284 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1285                                  u8 *lladdr, void *saddr,
1286                                  struct net_device *dev)
1287 {
1288         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1289                                                  lladdr || !dev->addr_len);
1290         if (neigh)
1291                 neigh_update(neigh, lladdr, NUD_STALE,
1292                              NEIGH_UPDATE_F_OVERRIDE);
1293         return neigh;
1294 }
1295 EXPORT_SYMBOL(neigh_event_ns);
1296
1297 /* called with read_lock_bh(&n->lock); */
1298 static void neigh_hh_init(struct neighbour *n)
1299 {
1300         struct net_device *dev = n->dev;
1301         __be16 prot = n->tbl->protocol;
1302         struct hh_cache *hh = &n->hh;
1303
1304         write_lock_bh(&n->lock);
1305
1306         /* Only one thread can come in here and initialize the
1307          * hh_cache entry.
1308          */
1309         if (!hh->hh_len)
1310                 dev->header_ops->cache(n, hh, prot);
1311
1312         write_unlock_bh(&n->lock);
1313 }
1314
1315 /* Slow and careful. */
1316
1317 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1318 {
1319         int rc = 0;
1320
1321         if (!neigh_event_send(neigh, skb)) {
1322                 int err;
1323                 struct net_device *dev = neigh->dev;
1324                 unsigned int seq;
1325
1326                 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1327                         neigh_hh_init(neigh);
1328
1329                 do {
1330                         __skb_pull(skb, skb_network_offset(skb));
1331                         seq = read_seqbegin(&neigh->ha_lock);
1332                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1333                                               neigh->ha, NULL, skb->len);
1334                 } while (read_seqretry(&neigh->ha_lock, seq));
1335
1336                 if (err >= 0)
1337                         rc = dev_queue_xmit(skb);
1338                 else
1339                         goto out_kfree_skb;
1340         }
1341 out:
1342         return rc;
1343 out_kfree_skb:
1344         rc = -EINVAL;
1345         kfree_skb(skb);
1346         goto out;
1347 }
1348 EXPORT_SYMBOL(neigh_resolve_output);
1349
1350 /* As fast as possible without hh cache */
1351
1352 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1353 {
1354         struct net_device *dev = neigh->dev;
1355         unsigned int seq;
1356         int err;
1357
1358         do {
1359                 __skb_pull(skb, skb_network_offset(skb));
1360                 seq = read_seqbegin(&neigh->ha_lock);
1361                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1362                                       neigh->ha, NULL, skb->len);
1363         } while (read_seqretry(&neigh->ha_lock, seq));
1364
1365         if (err >= 0)
1366                 err = dev_queue_xmit(skb);
1367         else {
1368                 err = -EINVAL;
1369                 kfree_skb(skb);
1370         }
1371         return err;
1372 }
1373 EXPORT_SYMBOL(neigh_connected_output);
1374
1375 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1376 {
1377         return dev_queue_xmit(skb);
1378 }
1379 EXPORT_SYMBOL(neigh_direct_output);
1380
1381 static void neigh_proxy_process(unsigned long arg)
1382 {
1383         struct neigh_table *tbl = (struct neigh_table *)arg;
1384         long sched_next = 0;
1385         unsigned long now = jiffies;
1386         struct sk_buff *skb, *n;
1387
1388         spin_lock(&tbl->proxy_queue.lock);
1389
1390         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1391                 long tdif = NEIGH_CB(skb)->sched_next - now;
1392
1393                 if (tdif <= 0) {
1394                         struct net_device *dev = skb->dev;
1395
1396                         __skb_unlink(skb, &tbl->proxy_queue);
1397                         if (tbl->proxy_redo && netif_running(dev)) {
1398                                 rcu_read_lock();
1399                                 tbl->proxy_redo(skb);
1400                                 rcu_read_unlock();
1401                         } else {
1402                                 kfree_skb(skb);
1403                         }
1404
1405                         dev_put(dev);
1406                 } else if (!sched_next || tdif < sched_next)
1407                         sched_next = tdif;
1408         }
1409         del_timer(&tbl->proxy_timer);
1410         if (sched_next)
1411                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1412         spin_unlock(&tbl->proxy_queue.lock);
1413 }
1414
1415 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1416                     struct sk_buff *skb)
1417 {
1418         unsigned long now = jiffies;
1419
1420         unsigned long sched_next = now + (prandom_u32() %
1421                                           NEIGH_VAR(p, PROXY_DELAY));
1422
1423         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1424                 kfree_skb(skb);
1425                 return;
1426         }
1427
1428         NEIGH_CB(skb)->sched_next = sched_next;
1429         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1430
1431         spin_lock(&tbl->proxy_queue.lock);
1432         if (del_timer(&tbl->proxy_timer)) {
1433                 if (time_before(tbl->proxy_timer.expires, sched_next))
1434                         sched_next = tbl->proxy_timer.expires;
1435         }
1436         skb_dst_drop(skb);
1437         dev_hold(skb->dev);
1438         __skb_queue_tail(&tbl->proxy_queue, skb);
1439         mod_timer(&tbl->proxy_timer, sched_next);
1440         spin_unlock(&tbl->proxy_queue.lock);
1441 }
1442 EXPORT_SYMBOL(pneigh_enqueue);
1443
1444 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1445                                                       struct net *net, int ifindex)
1446 {
1447         struct neigh_parms *p;
1448
1449         list_for_each_entry(p, &tbl->parms_list, list) {
1450                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1451                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1452                         return p;
1453         }
1454
1455         return NULL;
1456 }
1457
1458 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1459                                       struct neigh_table *tbl)
1460 {
1461         struct neigh_parms *p;
1462         struct net *net = dev_net(dev);
1463         const struct net_device_ops *ops = dev->netdev_ops;
1464
1465         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1466         if (p) {
1467                 p->tbl            = tbl;
1468                 atomic_set(&p->refcnt, 1);
1469                 p->reachable_time =
1470                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1471                 dev_hold(dev);
1472                 p->dev = dev;
1473                 write_pnet(&p->net, net);
1474                 p->sysctl_table = NULL;
1475
1476                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1477                         dev_put(dev);
1478                         kfree(p);
1479                         return NULL;
1480                 }
1481
1482                 write_lock_bh(&tbl->lock);
1483                 list_add(&p->list, &tbl->parms.list);
1484                 write_unlock_bh(&tbl->lock);
1485
1486                 neigh_parms_data_state_cleanall(p);
1487         }
1488         return p;
1489 }
1490 EXPORT_SYMBOL(neigh_parms_alloc);
1491
1492 static void neigh_rcu_free_parms(struct rcu_head *head)
1493 {
1494         struct neigh_parms *parms =
1495                 container_of(head, struct neigh_parms, rcu_head);
1496
1497         neigh_parms_put(parms);
1498 }
1499
1500 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1501 {
1502         if (!parms || parms == &tbl->parms)
1503                 return;
1504         write_lock_bh(&tbl->lock);
1505         list_del(&parms->list);
1506         parms->dead = 1;
1507         write_unlock_bh(&tbl->lock);
1508         if (parms->dev)
1509                 dev_put(parms->dev);
1510         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1511 }
1512 EXPORT_SYMBOL(neigh_parms_release);
1513
1514 static void neigh_parms_destroy(struct neigh_parms *parms)
1515 {
1516         kfree(parms);
1517 }
1518
1519 static struct lock_class_key neigh_table_proxy_queue_class;
1520
1521 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1522
1523 void neigh_table_init(int index, struct neigh_table *tbl)
1524 {
1525         unsigned long now = jiffies;
1526         unsigned long phsize;
1527
1528         INIT_LIST_HEAD(&tbl->parms_list);
1529         list_add(&tbl->parms.list, &tbl->parms_list);
1530         write_pnet(&tbl->parms.net, &init_net);
1531         atomic_set(&tbl->parms.refcnt, 1);
1532         tbl->parms.reachable_time =
1533                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1534
1535         tbl->stats = alloc_percpu(struct neigh_statistics);
1536         if (!tbl->stats)
1537                 panic("cannot create neighbour cache statistics");
1538
1539 #ifdef CONFIG_PROC_FS
1540         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1541                               &neigh_stat_seq_fops, tbl))
1542                 panic("cannot create neighbour proc dir entry");
1543 #endif
1544
1545         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1546
1547         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1548         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1549
1550         if (!tbl->nht || !tbl->phash_buckets)
1551                 panic("cannot allocate neighbour cache hashes");
1552
1553         if (!tbl->entry_size)
1554                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1555                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1556         else
1557                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1558
1559         rwlock_init(&tbl->lock);
1560         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1561         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1562                         tbl->parms.reachable_time);
1563         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1564         skb_queue_head_init_class(&tbl->proxy_queue,
1565                         &neigh_table_proxy_queue_class);
1566
1567         tbl->last_flush = now;
1568         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1569
1570         neigh_tables[index] = tbl;
1571 }
1572 EXPORT_SYMBOL(neigh_table_init);
1573
1574 int neigh_table_clear(int index, struct neigh_table *tbl)
1575 {
1576         neigh_tables[index] = NULL;
1577         /* It is not clean... Fix it to unload IPv6 module safely */
1578         cancel_delayed_work_sync(&tbl->gc_work);
1579         del_timer_sync(&tbl->proxy_timer);
1580         pneigh_queue_purge(&tbl->proxy_queue);
1581         neigh_ifdown(tbl, NULL);
1582         if (atomic_read(&tbl->entries))
1583                 pr_crit("neighbour leakage\n");
1584
1585         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586                  neigh_hash_free_rcu);
1587         tbl->nht = NULL;
1588
1589         kfree(tbl->phash_buckets);
1590         tbl->phash_buckets = NULL;
1591
1592         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593
1594         free_percpu(tbl->stats);
1595         tbl->stats = NULL;
1596
1597         return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600
1601 static struct neigh_table *neigh_find_table(int family)
1602 {
1603         struct neigh_table *tbl = NULL;
1604
1605         switch (family) {
1606         case AF_INET:
1607                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1608                 break;
1609         case AF_INET6:
1610                 tbl = neigh_tables[NEIGH_ND_TABLE];
1611                 break;
1612         case AF_DECnet:
1613                 tbl = neigh_tables[NEIGH_DN_TABLE];
1614                 break;
1615         }
1616
1617         return tbl;
1618 }
1619
1620 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1621 {
1622         struct net *net = sock_net(skb->sk);
1623         struct ndmsg *ndm;
1624         struct nlattr *dst_attr;
1625         struct neigh_table *tbl;
1626         struct neighbour *neigh;
1627         struct net_device *dev = NULL;
1628         int err = -EINVAL;
1629
1630         ASSERT_RTNL();
1631         if (nlmsg_len(nlh) < sizeof(*ndm))
1632                 goto out;
1633
1634         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1635         if (dst_attr == NULL)
1636                 goto out;
1637
1638         ndm = nlmsg_data(nlh);
1639         if (ndm->ndm_ifindex) {
1640                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1641                 if (dev == NULL) {
1642                         err = -ENODEV;
1643                         goto out;
1644                 }
1645         }
1646
1647         tbl = neigh_find_table(ndm->ndm_family);
1648         if (tbl == NULL)
1649                 return -EAFNOSUPPORT;
1650
1651         if (nla_len(dst_attr) < tbl->key_len)
1652                 goto out;
1653
1654         if (ndm->ndm_flags & NTF_PROXY) {
1655                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1656                 goto out;
1657         }
1658
1659         if (dev == NULL)
1660                 goto out;
1661
1662         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1663         if (neigh == NULL) {
1664                 err = -ENOENT;
1665                 goto out;
1666         }
1667
1668         err = neigh_update(neigh, NULL, NUD_FAILED,
1669                            NEIGH_UPDATE_F_OVERRIDE |
1670                            NEIGH_UPDATE_F_ADMIN);
1671         neigh_release(neigh);
1672
1673 out:
1674         return err;
1675 }
1676
1677 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1678 {
1679         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1680         struct net *net = sock_net(skb->sk);
1681         struct ndmsg *ndm;
1682         struct nlattr *tb[NDA_MAX+1];
1683         struct neigh_table *tbl;
1684         struct net_device *dev = NULL;
1685         struct neighbour *neigh;
1686         void *dst, *lladdr;
1687         int err;
1688
1689         ASSERT_RTNL();
1690         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1691         if (err < 0)
1692                 goto out;
1693
1694         err = -EINVAL;
1695         if (tb[NDA_DST] == NULL)
1696                 goto out;
1697
1698         ndm = nlmsg_data(nlh);
1699         if (ndm->ndm_ifindex) {
1700                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1701                 if (dev == NULL) {
1702                         err = -ENODEV;
1703                         goto out;
1704                 }
1705
1706                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1707                         goto out;
1708         }
1709
1710         tbl = neigh_find_table(ndm->ndm_family);
1711         if (tbl == NULL)
1712                 return -EAFNOSUPPORT;
1713
1714         if (nla_len(tb[NDA_DST]) < tbl->key_len)
1715                 goto out;
1716         dst = nla_data(tb[NDA_DST]);
1717         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1718
1719         if (ndm->ndm_flags & NTF_PROXY) {
1720                 struct pneigh_entry *pn;
1721
1722                 err = -ENOBUFS;
1723                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1724                 if (pn) {
1725                         pn->flags = ndm->ndm_flags;
1726                         err = 0;
1727                 }
1728                 goto out;
1729         }
1730
1731         if (dev == NULL)
1732                 goto out;
1733
1734         neigh = neigh_lookup(tbl, dst, dev);
1735         if (neigh == NULL) {
1736                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1737                         err = -ENOENT;
1738                         goto out;
1739                 }
1740
1741                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1742                 if (IS_ERR(neigh)) {
1743                         err = PTR_ERR(neigh);
1744                         goto out;
1745                 }
1746         } else {
1747                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1748                         err = -EEXIST;
1749                         neigh_release(neigh);
1750                         goto out;
1751                 }
1752
1753                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1754                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1755         }
1756
1757         if (ndm->ndm_flags & NTF_USE) {
1758                 neigh_event_send(neigh, NULL);
1759                 err = 0;
1760         } else
1761                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1762         neigh_release(neigh);
1763
1764 out:
1765         return err;
1766 }
1767
1768 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1769 {
1770         struct nlattr *nest;
1771
1772         nest = nla_nest_start(skb, NDTA_PARMS);
1773         if (nest == NULL)
1774                 return -ENOBUFS;
1775
1776         if ((parms->dev &&
1777              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1778             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1779             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1780                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1781             /* approximative value for deprecated QUEUE_LEN (in packets) */
1782             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1783                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1784             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1785             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1786             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1787                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1788             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1789                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1790             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1791                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1792             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1793             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1794                           NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1795             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1796                           NEIGH_VAR(parms, GC_STALETIME)) ||
1797             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1798                           NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1799             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1800                           NEIGH_VAR(parms, RETRANS_TIME)) ||
1801             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1802                           NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1803             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1804                           NEIGH_VAR(parms, PROXY_DELAY)) ||
1805             nla_put_msecs(skb, NDTPA_LOCKTIME,
1806                           NEIGH_VAR(parms, LOCKTIME)))
1807                 goto nla_put_failure;
1808         return nla_nest_end(skb, nest);
1809
1810 nla_put_failure:
1811         nla_nest_cancel(skb, nest);
1812         return -EMSGSIZE;
1813 }
1814
1815 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1816                               u32 pid, u32 seq, int type, int flags)
1817 {
1818         struct nlmsghdr *nlh;
1819         struct ndtmsg *ndtmsg;
1820
1821         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1822         if (nlh == NULL)
1823                 return -EMSGSIZE;
1824
1825         ndtmsg = nlmsg_data(nlh);
1826
1827         read_lock_bh(&tbl->lock);
1828         ndtmsg->ndtm_family = tbl->family;
1829         ndtmsg->ndtm_pad1   = 0;
1830         ndtmsg->ndtm_pad2   = 0;
1831
1832         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1833             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1834             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1835             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1836             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1837                 goto nla_put_failure;
1838         {
1839                 unsigned long now = jiffies;
1840                 long flush_delta = now - tbl->last_flush;
1841                 long rand_delta = now - tbl->last_rand;
1842                 struct neigh_hash_table *nht;
1843                 struct ndt_config ndc = {
1844                         .ndtc_key_len           = tbl->key_len,
1845                         .ndtc_entry_size        = tbl->entry_size,
1846                         .ndtc_entries           = atomic_read(&tbl->entries),
1847                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1848                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1849                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1850                 };
1851
1852                 rcu_read_lock_bh();
1853                 nht = rcu_dereference_bh(tbl->nht);
1854                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1855                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1856                 rcu_read_unlock_bh();
1857
1858                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1859                         goto nla_put_failure;
1860         }
1861
1862         {
1863                 int cpu;
1864                 struct ndt_stats ndst;
1865
1866                 memset(&ndst, 0, sizeof(ndst));
1867
1868                 for_each_possible_cpu(cpu) {
1869                         struct neigh_statistics *st;
1870
1871                         st = per_cpu_ptr(tbl->stats, cpu);
1872                         ndst.ndts_allocs                += st->allocs;
1873                         ndst.ndts_destroys              += st->destroys;
1874                         ndst.ndts_hash_grows            += st->hash_grows;
1875                         ndst.ndts_res_failed            += st->res_failed;
1876                         ndst.ndts_lookups               += st->lookups;
1877                         ndst.ndts_hits                  += st->hits;
1878                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1879                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1880                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1881                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1882                         ndst.ndts_table_fulls           += st->table_fulls;
1883                 }
1884
1885                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1886                         goto nla_put_failure;
1887         }
1888
1889         BUG_ON(tbl->parms.dev);
1890         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1891                 goto nla_put_failure;
1892
1893         read_unlock_bh(&tbl->lock);
1894         nlmsg_end(skb, nlh);
1895         return 0;
1896
1897 nla_put_failure:
1898         read_unlock_bh(&tbl->lock);
1899         nlmsg_cancel(skb, nlh);
1900         return -EMSGSIZE;
1901 }
1902
1903 static int neightbl_fill_param_info(struct sk_buff *skb,
1904                                     struct neigh_table *tbl,
1905                                     struct neigh_parms *parms,
1906                                     u32 pid, u32 seq, int type,
1907                                     unsigned int flags)
1908 {
1909         struct ndtmsg *ndtmsg;
1910         struct nlmsghdr *nlh;
1911
1912         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1913         if (nlh == NULL)
1914                 return -EMSGSIZE;
1915
1916         ndtmsg = nlmsg_data(nlh);
1917
1918         read_lock_bh(&tbl->lock);
1919         ndtmsg->ndtm_family = tbl->family;
1920         ndtmsg->ndtm_pad1   = 0;
1921         ndtmsg->ndtm_pad2   = 0;
1922
1923         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1924             neightbl_fill_parms(skb, parms) < 0)
1925                 goto errout;
1926
1927         read_unlock_bh(&tbl->lock);
1928         nlmsg_end(skb, nlh);
1929         return 0;
1930 errout:
1931         read_unlock_bh(&tbl->lock);
1932         nlmsg_cancel(skb, nlh);
1933         return -EMSGSIZE;
1934 }
1935
1936 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1937         [NDTA_NAME]             = { .type = NLA_STRING },
1938         [NDTA_THRESH1]          = { .type = NLA_U32 },
1939         [NDTA_THRESH2]          = { .type = NLA_U32 },
1940         [NDTA_THRESH3]          = { .type = NLA_U32 },
1941         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1942         [NDTA_PARMS]            = { .type = NLA_NESTED },
1943 };
1944
1945 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1946         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1947         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1948         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1949         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1950         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1951         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1952         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1953         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1954         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1955         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1956         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1957         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1958         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1959         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1960 };
1961
1962 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1963 {
1964         struct net *net = sock_net(skb->sk);
1965         struct neigh_table *tbl;
1966         struct ndtmsg *ndtmsg;
1967         struct nlattr *tb[NDTA_MAX+1];
1968         bool found = false;
1969         int err, tidx;
1970
1971         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1972                           nl_neightbl_policy);
1973         if (err < 0)
1974                 goto errout;
1975
1976         if (tb[NDTA_NAME] == NULL) {
1977                 err = -EINVAL;
1978                 goto errout;
1979         }
1980
1981         ndtmsg = nlmsg_data(nlh);
1982
1983         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
1984                 tbl = neigh_tables[tidx];
1985                 if (!tbl)
1986                         continue;
1987                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1988                         continue;
1989                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
1990                         found = true;
1991                         break;
1992                 }
1993         }
1994
1995         if (!found)
1996                 return -ENOENT;
1997
1998         /*
1999          * We acquire tbl->lock to be nice to the periodic timers and
2000          * make sure they always see a consistent set of values.
2001          */
2002         write_lock_bh(&tbl->lock);
2003
2004         if (tb[NDTA_PARMS]) {
2005                 struct nlattr *tbp[NDTPA_MAX+1];
2006                 struct neigh_parms *p;
2007                 int i, ifindex = 0;
2008
2009                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2010                                        nl_ntbl_parm_policy);
2011                 if (err < 0)
2012                         goto errout_tbl_lock;
2013
2014                 if (tbp[NDTPA_IFINDEX])
2015                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2016
2017                 p = lookup_neigh_parms(tbl, net, ifindex);
2018                 if (p == NULL) {
2019                         err = -ENOENT;
2020                         goto errout_tbl_lock;
2021                 }
2022
2023                 for (i = 1; i <= NDTPA_MAX; i++) {
2024                         if (tbp[i] == NULL)
2025                                 continue;
2026
2027                         switch (i) {
2028                         case NDTPA_QUEUE_LEN:
2029                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2030                                               nla_get_u32(tbp[i]) *
2031                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2032                                 break;
2033                         case NDTPA_QUEUE_LENBYTES:
2034                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2035                                               nla_get_u32(tbp[i]));
2036                                 break;
2037                         case NDTPA_PROXY_QLEN:
2038                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2039                                               nla_get_u32(tbp[i]));
2040                                 break;
2041                         case NDTPA_APP_PROBES:
2042                                 NEIGH_VAR_SET(p, APP_PROBES,
2043                                               nla_get_u32(tbp[i]));
2044                                 break;
2045                         case NDTPA_UCAST_PROBES:
2046                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2047                                               nla_get_u32(tbp[i]));
2048                                 break;
2049                         case NDTPA_MCAST_PROBES:
2050                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2051                                               nla_get_u32(tbp[i]));
2052                                 break;
2053                         case NDTPA_MCAST_REPROBES:
2054                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2055                                               nla_get_u32(tbp[i]));
2056                                 break;
2057                         case NDTPA_BASE_REACHABLE_TIME:
2058                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2059                                               nla_get_msecs(tbp[i]));
2060                                 /* update reachable_time as well, otherwise, the change will
2061                                  * only be effective after the next time neigh_periodic_work
2062                                  * decides to recompute it (can be multiple minutes)
2063                                  */
2064                                 p->reachable_time =
2065                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2066                                 break;
2067                         case NDTPA_GC_STALETIME:
2068                                 NEIGH_VAR_SET(p, GC_STALETIME,
2069                                               nla_get_msecs(tbp[i]));
2070                                 break;
2071                         case NDTPA_DELAY_PROBE_TIME:
2072                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2073                                               nla_get_msecs(tbp[i]));
2074                                 break;
2075                         case NDTPA_RETRANS_TIME:
2076                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2077                                               nla_get_msecs(tbp[i]));
2078                                 break;
2079                         case NDTPA_ANYCAST_DELAY:
2080                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2081                                               nla_get_msecs(tbp[i]));
2082                                 break;
2083                         case NDTPA_PROXY_DELAY:
2084                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2085                                               nla_get_msecs(tbp[i]));
2086                                 break;
2087                         case NDTPA_LOCKTIME:
2088                                 NEIGH_VAR_SET(p, LOCKTIME,
2089                                               nla_get_msecs(tbp[i]));
2090                                 break;
2091                         }
2092                 }
2093         }
2094
2095         err = -ENOENT;
2096         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2097              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2098             !net_eq(net, &init_net))
2099                 goto errout_tbl_lock;
2100
2101         if (tb[NDTA_THRESH1])
2102                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2103
2104         if (tb[NDTA_THRESH2])
2105                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2106
2107         if (tb[NDTA_THRESH3])
2108                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2109
2110         if (tb[NDTA_GC_INTERVAL])
2111                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2112
2113         err = 0;
2114
2115 errout_tbl_lock:
2116         write_unlock_bh(&tbl->lock);
2117 errout:
2118         return err;
2119 }
2120
2121 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2122 {
2123         struct net *net = sock_net(skb->sk);
2124         int family, tidx, nidx = 0;
2125         int tbl_skip = cb->args[0];
2126         int neigh_skip = cb->args[1];
2127         struct neigh_table *tbl;
2128
2129         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2130
2131         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2132                 struct neigh_parms *p;
2133
2134                 tbl = neigh_tables[tidx];
2135                 if (!tbl)
2136                         continue;
2137
2138                 if (tidx < tbl_skip || (family && tbl->family != family))
2139                         continue;
2140
2141                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2142                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2143                                        NLM_F_MULTI) < 0)
2144                         break;
2145
2146                 nidx = 0;
2147                 p = list_next_entry(&tbl->parms, list);
2148                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2149                         if (!net_eq(neigh_parms_net(p), net))
2150                                 continue;
2151
2152                         if (nidx < neigh_skip)
2153                                 goto next;
2154
2155                         if (neightbl_fill_param_info(skb, tbl, p,
2156                                                      NETLINK_CB(cb->skb).portid,
2157                                                      cb->nlh->nlmsg_seq,
2158                                                      RTM_NEWNEIGHTBL,
2159                                                      NLM_F_MULTI) < 0)
2160                                 goto out;
2161                 next:
2162                         nidx++;
2163                 }
2164
2165                 neigh_skip = 0;
2166         }
2167 out:
2168         cb->args[0] = tidx;
2169         cb->args[1] = nidx;
2170
2171         return skb->len;
2172 }
2173
2174 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2175                            u32 pid, u32 seq, int type, unsigned int flags)
2176 {
2177         unsigned long now = jiffies;
2178         struct nda_cacheinfo ci;
2179         struct nlmsghdr *nlh;
2180         struct ndmsg *ndm;
2181
2182         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2183         if (nlh == NULL)
2184                 return -EMSGSIZE;
2185
2186         ndm = nlmsg_data(nlh);
2187         ndm->ndm_family  = neigh->ops->family;
2188         ndm->ndm_pad1    = 0;
2189         ndm->ndm_pad2    = 0;
2190         ndm->ndm_flags   = neigh->flags;
2191         ndm->ndm_type    = neigh->type;
2192         ndm->ndm_ifindex = neigh->dev->ifindex;
2193
2194         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2195                 goto nla_put_failure;
2196
2197         read_lock_bh(&neigh->lock);
2198         ndm->ndm_state   = neigh->nud_state;
2199         if (neigh->nud_state & NUD_VALID) {
2200                 char haddr[MAX_ADDR_LEN];
2201
2202                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2203                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2204                         read_unlock_bh(&neigh->lock);
2205                         goto nla_put_failure;
2206                 }
2207         }
2208
2209         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2210         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2211         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2212         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2213         read_unlock_bh(&neigh->lock);
2214
2215         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2216             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2217                 goto nla_put_failure;
2218
2219         nlmsg_end(skb, nlh);
2220         return 0;
2221
2222 nla_put_failure:
2223         nlmsg_cancel(skb, nlh);
2224         return -EMSGSIZE;
2225 }
2226
2227 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2228                             u32 pid, u32 seq, int type, unsigned int flags,
2229                             struct neigh_table *tbl)
2230 {
2231         struct nlmsghdr *nlh;
2232         struct ndmsg *ndm;
2233
2234         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2235         if (nlh == NULL)
2236                 return -EMSGSIZE;
2237
2238         ndm = nlmsg_data(nlh);
2239         ndm->ndm_family  = tbl->family;
2240         ndm->ndm_pad1    = 0;
2241         ndm->ndm_pad2    = 0;
2242         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2243         ndm->ndm_type    = RTN_UNICAST;
2244         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2245         ndm->ndm_state   = NUD_NONE;
2246
2247         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2248                 goto nla_put_failure;
2249
2250         nlmsg_end(skb, nlh);
2251         return 0;
2252
2253 nla_put_failure:
2254         nlmsg_cancel(skb, nlh);
2255         return -EMSGSIZE;
2256 }
2257
2258 static void neigh_update_notify(struct neighbour *neigh)
2259 {
2260         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2261         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2262 }
2263
2264 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2265 {
2266         struct net_device *master;
2267
2268         if (!master_idx)
2269                 return false;
2270
2271         master = netdev_master_upper_dev_get(dev);
2272         if (!master || master->ifindex != master_idx)
2273                 return true;
2274
2275         return false;
2276 }
2277
2278 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2279 {
2280         if (filter_idx && dev->ifindex != filter_idx)
2281                 return true;
2282
2283         return false;
2284 }
2285
2286 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2287                             struct netlink_callback *cb)
2288 {
2289         struct net *net = sock_net(skb->sk);
2290         const struct nlmsghdr *nlh = cb->nlh;
2291         struct nlattr *tb[NDA_MAX + 1];
2292         struct neighbour *n;
2293         int rc, h, s_h = cb->args[1];
2294         int idx, s_idx = idx = cb->args[2];
2295         struct neigh_hash_table *nht;
2296         int filter_master_idx = 0, filter_idx = 0;
2297         unsigned int flags = NLM_F_MULTI;
2298         int err;
2299
2300         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
2301         if (!err) {
2302                 if (tb[NDA_IFINDEX]) {
2303                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2304                                 return -EINVAL;
2305                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2306                 }
2307                 if (tb[NDA_MASTER]) {
2308                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2309                                 return -EINVAL;
2310                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2311                 }
2312                 if (filter_idx || filter_master_idx)
2313                         flags |= NLM_F_DUMP_FILTERED;
2314         }
2315
2316         rcu_read_lock_bh();
2317         nht = rcu_dereference_bh(tbl->nht);
2318
2319         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2320                 if (h > s_h)
2321                         s_idx = 0;
2322                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2323                      n != NULL;
2324                      n = rcu_dereference_bh(n->next)) {
2325                         if (!net_eq(dev_net(n->dev), net))
2326                                 continue;
2327                         if (neigh_ifindex_filtered(n->dev, filter_idx))
2328                                 continue;
2329                         if (neigh_master_filtered(n->dev, filter_master_idx))
2330                                 continue;
2331                         if (idx < s_idx)
2332                                 goto next;
2333                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2334                                             cb->nlh->nlmsg_seq,
2335                                             RTM_NEWNEIGH,
2336                                             flags) < 0) {
2337                                 rc = -1;
2338                                 goto out;
2339                         }
2340 next:
2341                         idx++;
2342                 }
2343         }
2344         rc = skb->len;
2345 out:
2346         rcu_read_unlock_bh();
2347         cb->args[1] = h;
2348         cb->args[2] = idx;
2349         return rc;
2350 }
2351
2352 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2353                              struct netlink_callback *cb)
2354 {
2355         struct pneigh_entry *n;
2356         struct net *net = sock_net(skb->sk);
2357         int rc, h, s_h = cb->args[3];
2358         int idx, s_idx = idx = cb->args[4];
2359
2360         read_lock_bh(&tbl->lock);
2361
2362         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2363                 if (h > s_h)
2364                         s_idx = 0;
2365                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2366                         if (pneigh_net(n) != net)
2367                                 continue;
2368                         if (idx < s_idx)
2369                                 goto next;
2370                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2371                                             cb->nlh->nlmsg_seq,
2372                                             RTM_NEWNEIGH,
2373                                             NLM_F_MULTI, tbl) < 0) {
2374                                 read_unlock_bh(&tbl->lock);
2375                                 rc = -1;
2376                                 goto out;
2377                         }
2378                 next:
2379                         idx++;
2380                 }
2381         }
2382
2383         read_unlock_bh(&tbl->lock);
2384         rc = skb->len;
2385 out:
2386         cb->args[3] = h;
2387         cb->args[4] = idx;
2388         return rc;
2389
2390 }
2391
2392 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2393 {
2394         struct neigh_table *tbl;
2395         int t, family, s_t;
2396         int proxy = 0;
2397         int err;
2398
2399         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2400
2401         /* check for full ndmsg structure presence, family member is
2402          * the same for both structures
2403          */
2404         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2405             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2406                 proxy = 1;
2407
2408         s_t = cb->args[0];
2409
2410         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2411                 tbl = neigh_tables[t];
2412
2413                 if (!tbl)
2414                         continue;
2415                 if (t < s_t || (family && tbl->family != family))
2416                         continue;
2417                 if (t > s_t)
2418                         memset(&cb->args[1], 0, sizeof(cb->args) -
2419                                                 sizeof(cb->args[0]));
2420                 if (proxy)
2421                         err = pneigh_dump_table(tbl, skb, cb);
2422                 else
2423                         err = neigh_dump_table(tbl, skb, cb);
2424                 if (err < 0)
2425                         break;
2426         }
2427
2428         cb->args[0] = t;
2429         return skb->len;
2430 }
2431
2432 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2433 {
2434         int chain;
2435         struct neigh_hash_table *nht;
2436
2437         rcu_read_lock_bh();
2438         nht = rcu_dereference_bh(tbl->nht);
2439
2440         read_lock(&tbl->lock); /* avoid resizes */
2441         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2442                 struct neighbour *n;
2443
2444                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2445                      n != NULL;
2446                      n = rcu_dereference_bh(n->next))
2447                         cb(n, cookie);
2448         }
2449         read_unlock(&tbl->lock);
2450         rcu_read_unlock_bh();
2451 }
2452 EXPORT_SYMBOL(neigh_for_each);
2453
2454 /* The tbl->lock must be held as a writer and BH disabled. */
2455 void __neigh_for_each_release(struct neigh_table *tbl,
2456                               int (*cb)(struct neighbour *))
2457 {
2458         int chain;
2459         struct neigh_hash_table *nht;
2460
2461         nht = rcu_dereference_protected(tbl->nht,
2462                                         lockdep_is_held(&tbl->lock));
2463         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2464                 struct neighbour *n;
2465                 struct neighbour __rcu **np;
2466
2467                 np = &nht->hash_buckets[chain];
2468                 while ((n = rcu_dereference_protected(*np,
2469                                         lockdep_is_held(&tbl->lock))) != NULL) {
2470                         int release;
2471
2472                         write_lock(&n->lock);
2473                         release = cb(n);
2474                         if (release) {
2475                                 rcu_assign_pointer(*np,
2476                                         rcu_dereference_protected(n->next,
2477                                                 lockdep_is_held(&tbl->lock)));
2478                                 n->dead = 1;
2479                         } else
2480                                 np = &n->next;
2481                         write_unlock(&n->lock);
2482                         if (release)
2483                                 neigh_cleanup_and_release(n);
2484                 }
2485         }
2486 }
2487 EXPORT_SYMBOL(__neigh_for_each_release);
2488
2489 int neigh_xmit(int index, struct net_device *dev,
2490                const void *addr, struct sk_buff *skb)
2491 {
2492         int err = -EAFNOSUPPORT;
2493         if (likely(index < NEIGH_NR_TABLES)) {
2494                 struct neigh_table *tbl;
2495                 struct neighbour *neigh;
2496
2497                 tbl = neigh_tables[index];
2498                 if (!tbl)
2499                         goto out;
2500                 rcu_read_lock_bh();
2501                 if (index == NEIGH_ARP_TABLE) {
2502                         u32 key = *((u32 *)addr);
2503
2504                         neigh = __ipv4_neigh_lookup_noref(dev, key);
2505                 } else {
2506                         neigh = __neigh_lookup_noref(tbl, addr, dev);
2507                 }
2508                 if (!neigh)
2509                         neigh = __neigh_create(tbl, addr, dev, false);
2510                 err = PTR_ERR(neigh);
2511                 if (IS_ERR(neigh)) {
2512                         rcu_read_unlock_bh();
2513                         goto out_kfree_skb;
2514                 }
2515                 err = neigh->output(neigh, skb);
2516                 rcu_read_unlock_bh();
2517         }
2518         else if (index == NEIGH_LINK_TABLE) {
2519                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2520                                       addr, NULL, skb->len);
2521                 if (err < 0)
2522                         goto out_kfree_skb;
2523                 err = dev_queue_xmit(skb);
2524         }
2525 out:
2526         return err;
2527 out_kfree_skb:
2528         kfree_skb(skb);
2529         goto out;
2530 }
2531 EXPORT_SYMBOL(neigh_xmit);
2532
2533 #ifdef CONFIG_PROC_FS
2534
2535 static struct neighbour *neigh_get_first(struct seq_file *seq)
2536 {
2537         struct neigh_seq_state *state = seq->private;
2538         struct net *net = seq_file_net(seq);
2539         struct neigh_hash_table *nht = state->nht;
2540         struct neighbour *n = NULL;
2541         int bucket = state->bucket;
2542
2543         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2544         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2545                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2546
2547                 while (n) {
2548                         if (!net_eq(dev_net(n->dev), net))
2549                                 goto next;
2550                         if (state->neigh_sub_iter) {
2551                                 loff_t fakep = 0;
2552                                 void *v;
2553
2554                                 v = state->neigh_sub_iter(state, n, &fakep);
2555                                 if (!v)
2556                                         goto next;
2557                         }
2558                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2559                                 break;
2560                         if (n->nud_state & ~NUD_NOARP)
2561                                 break;
2562 next:
2563                         n = rcu_dereference_bh(n->next);
2564                 }
2565
2566                 if (n)
2567                         break;
2568         }
2569         state->bucket = bucket;
2570
2571         return n;
2572 }
2573
2574 static struct neighbour *neigh_get_next(struct seq_file *seq,
2575                                         struct neighbour *n,
2576                                         loff_t *pos)
2577 {
2578         struct neigh_seq_state *state = seq->private;
2579         struct net *net = seq_file_net(seq);
2580         struct neigh_hash_table *nht = state->nht;
2581
2582         if (state->neigh_sub_iter) {
2583                 void *v = state->neigh_sub_iter(state, n, pos);
2584                 if (v)
2585                         return n;
2586         }
2587         n = rcu_dereference_bh(n->next);
2588
2589         while (1) {
2590                 while (n) {
2591                         if (!net_eq(dev_net(n->dev), net))
2592                                 goto next;
2593                         if (state->neigh_sub_iter) {
2594                                 void *v = state->neigh_sub_iter(state, n, pos);
2595                                 if (v)
2596                                         return n;
2597                                 goto next;
2598                         }
2599                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2600                                 break;
2601
2602                         if (n->nud_state & ~NUD_NOARP)
2603                                 break;
2604 next:
2605                         n = rcu_dereference_bh(n->next);
2606                 }
2607
2608                 if (n)
2609                         break;
2610
2611                 if (++state->bucket >= (1 << nht->hash_shift))
2612                         break;
2613
2614                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2615         }
2616
2617         if (n && pos)
2618                 --(*pos);
2619         return n;
2620 }
2621
2622 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2623 {
2624         struct neighbour *n = neigh_get_first(seq);
2625
2626         if (n) {
2627                 --(*pos);
2628                 while (*pos) {
2629                         n = neigh_get_next(seq, n, pos);
2630                         if (!n)
2631                                 break;
2632                 }
2633         }
2634         return *pos ? NULL : n;
2635 }
2636
2637 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2638 {
2639         struct neigh_seq_state *state = seq->private;
2640         struct net *net = seq_file_net(seq);
2641         struct neigh_table *tbl = state->tbl;
2642         struct pneigh_entry *pn = NULL;
2643         int bucket = state->bucket;
2644
2645         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2646         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2647                 pn = tbl->phash_buckets[bucket];
2648                 while (pn && !net_eq(pneigh_net(pn), net))
2649                         pn = pn->next;
2650                 if (pn)
2651                         break;
2652         }
2653         state->bucket = bucket;
2654
2655         return pn;
2656 }
2657
2658 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2659                                             struct pneigh_entry *pn,
2660                                             loff_t *pos)
2661 {
2662         struct neigh_seq_state *state = seq->private;
2663         struct net *net = seq_file_net(seq);
2664         struct neigh_table *tbl = state->tbl;
2665
2666         do {
2667                 pn = pn->next;
2668         } while (pn && !net_eq(pneigh_net(pn), net));
2669
2670         while (!pn) {
2671                 if (++state->bucket > PNEIGH_HASHMASK)
2672                         break;
2673                 pn = tbl->phash_buckets[state->bucket];
2674                 while (pn && !net_eq(pneigh_net(pn), net))
2675                         pn = pn->next;
2676                 if (pn)
2677                         break;
2678         }
2679
2680         if (pn && pos)
2681                 --(*pos);
2682
2683         return pn;
2684 }
2685
2686 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2687 {
2688         struct pneigh_entry *pn = pneigh_get_first(seq);
2689
2690         if (pn) {
2691                 --(*pos);
2692                 while (*pos) {
2693                         pn = pneigh_get_next(seq, pn, pos);
2694                         if (!pn)
2695                                 break;
2696                 }
2697         }
2698         return *pos ? NULL : pn;
2699 }
2700
2701 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2702 {
2703         struct neigh_seq_state *state = seq->private;
2704         void *rc;
2705         loff_t idxpos = *pos;
2706
2707         rc = neigh_get_idx(seq, &idxpos);
2708         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2709                 rc = pneigh_get_idx(seq, &idxpos);
2710
2711         return rc;
2712 }
2713
2714 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2715         __acquires(tbl->lock)
2716         __acquires(rcu_bh)
2717 {
2718         struct neigh_seq_state *state = seq->private;
2719
2720         state->tbl = tbl;
2721         state->bucket = 0;
2722         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2723
2724         rcu_read_lock_bh();
2725         state->nht = rcu_dereference_bh(tbl->nht);
2726         read_lock(&tbl->lock);
2727
2728         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2729 }
2730 EXPORT_SYMBOL(neigh_seq_start);
2731
2732 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2733 {
2734         struct neigh_seq_state *state;
2735         void *rc;
2736
2737         if (v == SEQ_START_TOKEN) {
2738                 rc = neigh_get_first(seq);
2739                 goto out;
2740         }
2741
2742         state = seq->private;
2743         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2744                 rc = neigh_get_next(seq, v, NULL);
2745                 if (rc)
2746                         goto out;
2747                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2748                         rc = pneigh_get_first(seq);
2749         } else {
2750                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2751                 rc = pneigh_get_next(seq, v, NULL);
2752         }
2753 out:
2754         ++(*pos);
2755         return rc;
2756 }
2757 EXPORT_SYMBOL(neigh_seq_next);
2758
2759 void neigh_seq_stop(struct seq_file *seq, void *v)
2760         __releases(tbl->lock)
2761         __releases(rcu_bh)
2762 {
2763         struct neigh_seq_state *state = seq->private;
2764         struct neigh_table *tbl = state->tbl;
2765
2766         read_unlock(&tbl->lock);
2767         rcu_read_unlock_bh();
2768 }
2769 EXPORT_SYMBOL(neigh_seq_stop);
2770
2771 /* statistics via seq_file */
2772
2773 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2774 {
2775         struct neigh_table *tbl = seq->private;
2776         int cpu;
2777
2778         if (*pos == 0)
2779                 return SEQ_START_TOKEN;
2780
2781         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2782                 if (!cpu_possible(cpu))
2783                         continue;
2784                 *pos = cpu+1;
2785                 return per_cpu_ptr(tbl->stats, cpu);
2786         }
2787         return NULL;
2788 }
2789
2790 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2791 {
2792         struct neigh_table *tbl = seq->private;
2793         int cpu;
2794
2795         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2796                 if (!cpu_possible(cpu))
2797                         continue;
2798                 *pos = cpu+1;
2799                 return per_cpu_ptr(tbl->stats, cpu);
2800         }
2801         (*pos)++;
2802         return NULL;
2803 }
2804
2805 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2806 {
2807
2808 }
2809
2810 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2811 {
2812         struct neigh_table *tbl = seq->private;
2813         struct neigh_statistics *st = v;
2814
2815         if (v == SEQ_START_TOKEN) {
2816                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2817                 return 0;
2818         }
2819
2820         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2821                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2822                    atomic_read(&tbl->entries),
2823
2824                    st->allocs,
2825                    st->destroys,
2826                    st->hash_grows,
2827
2828                    st->lookups,
2829                    st->hits,
2830
2831                    st->res_failed,
2832
2833                    st->rcv_probes_mcast,
2834                    st->rcv_probes_ucast,
2835
2836                    st->periodic_gc_runs,
2837                    st->forced_gc_runs,
2838                    st->unres_discards,
2839                    st->table_fulls
2840                    );
2841
2842         return 0;
2843 }
2844
2845 static const struct seq_operations neigh_stat_seq_ops = {
2846         .start  = neigh_stat_seq_start,
2847         .next   = neigh_stat_seq_next,
2848         .stop   = neigh_stat_seq_stop,
2849         .show   = neigh_stat_seq_show,
2850 };
2851
2852 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2853 {
2854         int ret = seq_open(file, &neigh_stat_seq_ops);
2855
2856         if (!ret) {
2857                 struct seq_file *sf = file->private_data;
2858                 sf->private = PDE_DATA(inode);
2859         }
2860         return ret;
2861 };
2862
2863 static const struct file_operations neigh_stat_seq_fops = {
2864         .owner   = THIS_MODULE,
2865         .open    = neigh_stat_seq_open,
2866         .read    = seq_read,
2867         .llseek  = seq_lseek,
2868         .release = seq_release,
2869 };
2870
2871 #endif /* CONFIG_PROC_FS */
2872
2873 static inline size_t neigh_nlmsg_size(void)
2874 {
2875         return NLMSG_ALIGN(sizeof(struct ndmsg))
2876                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2877                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2878                + nla_total_size(sizeof(struct nda_cacheinfo))
2879                + nla_total_size(4); /* NDA_PROBES */
2880 }
2881
2882 static void __neigh_notify(struct neighbour *n, int type, int flags)
2883 {
2884         struct net *net = dev_net(n->dev);
2885         struct sk_buff *skb;
2886         int err = -ENOBUFS;
2887
2888         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2889         if (skb == NULL)
2890                 goto errout;
2891
2892         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2893         if (err < 0) {
2894                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2895                 WARN_ON(err == -EMSGSIZE);
2896                 kfree_skb(skb);
2897                 goto errout;
2898         }
2899         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2900         return;
2901 errout:
2902         if (err < 0)
2903                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2904 }
2905
2906 void neigh_app_ns(struct neighbour *n)
2907 {
2908         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2909 }
2910 EXPORT_SYMBOL(neigh_app_ns);
2911
2912 #ifdef CONFIG_SYSCTL
2913 static int zero;
2914 static int int_max = INT_MAX;
2915 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2916
2917 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2918                            void __user *buffer, size_t *lenp, loff_t *ppos)
2919 {
2920         int size, ret;
2921         struct ctl_table tmp = *ctl;
2922
2923         tmp.extra1 = &zero;
2924         tmp.extra2 = &unres_qlen_max;
2925         tmp.data = &size;
2926
2927         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2928         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2929
2930         if (write && !ret)
2931                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2932         return ret;
2933 }
2934
2935 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2936                                                    int family)
2937 {
2938         switch (family) {
2939         case AF_INET:
2940                 return __in_dev_arp_parms_get_rcu(dev);
2941         case AF_INET6:
2942                 return __in6_dev_nd_parms_get_rcu(dev);
2943         }
2944         return NULL;
2945 }
2946
2947 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2948                                   int index)
2949 {
2950         struct net_device *dev;
2951         int family = neigh_parms_family(p);
2952
2953         rcu_read_lock();
2954         for_each_netdev_rcu(net, dev) {
2955                 struct neigh_parms *dst_p =
2956                                 neigh_get_dev_parms_rcu(dev, family);
2957
2958                 if (dst_p && !test_bit(index, dst_p->data_state))
2959                         dst_p->data[index] = p->data[index];
2960         }
2961         rcu_read_unlock();
2962 }
2963
2964 static void neigh_proc_update(struct ctl_table *ctl, int write)
2965 {
2966         struct net_device *dev = ctl->extra1;
2967         struct neigh_parms *p = ctl->extra2;
2968         struct net *net = neigh_parms_net(p);
2969         int index = (int *) ctl->data - p->data;
2970
2971         if (!write)
2972                 return;
2973
2974         set_bit(index, p->data_state);
2975         if (!dev) /* NULL dev means this is default value */
2976                 neigh_copy_dflt_parms(net, p, index);
2977 }
2978
2979 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2980                                            void __user *buffer,
2981                                            size_t *lenp, loff_t *ppos)
2982 {
2983         struct ctl_table tmp = *ctl;
2984         int ret;
2985
2986         tmp.extra1 = &zero;
2987         tmp.extra2 = &int_max;
2988
2989         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2990         neigh_proc_update(ctl, write);
2991         return ret;
2992 }
2993
2994 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2995                         void __user *buffer, size_t *lenp, loff_t *ppos)
2996 {
2997         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2998
2999         neigh_proc_update(ctl, write);
3000         return ret;
3001 }
3002 EXPORT_SYMBOL(neigh_proc_dointvec);
3003
3004 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3005                                 void __user *buffer,
3006                                 size_t *lenp, loff_t *ppos)
3007 {
3008         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3009
3010         neigh_proc_update(ctl, write);
3011         return ret;
3012 }
3013 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3014
3015 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3016                                               void __user *buffer,
3017                                               size_t *lenp, loff_t *ppos)
3018 {
3019         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3020
3021         neigh_proc_update(ctl, write);
3022         return ret;
3023 }
3024
3025 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3026                                    void __user *buffer,
3027                                    size_t *lenp, loff_t *ppos)
3028 {
3029         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3030
3031         neigh_proc_update(ctl, write);
3032         return ret;
3033 }
3034 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3035
3036 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3037                                           void __user *buffer,
3038                                           size_t *lenp, loff_t *ppos)
3039 {
3040         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3041
3042         neigh_proc_update(ctl, write);
3043         return ret;
3044 }
3045
3046 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3047                                           void __user *buffer,
3048                                           size_t *lenp, loff_t *ppos)
3049 {
3050         struct neigh_parms *p = ctl->extra2;
3051         int ret;
3052
3053         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3054                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3055         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3056                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3057         else
3058                 ret = -1;
3059
3060         if (write && ret == 0) {
3061                 /* update reachable_time as well, otherwise, the change will
3062                  * only be effective after the next time neigh_periodic_work
3063                  * decides to recompute it
3064                  */
3065                 p->reachable_time =
3066                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3067         }
3068         return ret;
3069 }
3070
3071 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3072         (&((struct neigh_parms *) 0)->data[index])
3073
3074 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3075         [NEIGH_VAR_ ## attr] = { \
3076                 .procname       = name, \
3077                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3078                 .maxlen         = sizeof(int), \
3079                 .mode           = mval, \
3080                 .proc_handler   = proc, \
3081         }
3082
3083 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3084         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3085
3086 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3087         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3088
3089 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3090         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3091
3092 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3093         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3094
3095 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3096         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3097
3098 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3099         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3100
3101 static struct neigh_sysctl_table {
3102         struct ctl_table_header *sysctl_header;
3103         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3104 } neigh_sysctl_template __read_mostly = {
3105         .neigh_vars = {
3106                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3107                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3108                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3109                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3110                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3111                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3112                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3113                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3114                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3115                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3116                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3117                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3118                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3119                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3120                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3121                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3122                 [NEIGH_VAR_GC_INTERVAL] = {
3123                         .procname       = "gc_interval",
3124                         .maxlen         = sizeof(int),
3125                         .mode           = 0644,
3126                         .proc_handler   = proc_dointvec_jiffies,
3127                 },
3128                 [NEIGH_VAR_GC_THRESH1] = {
3129                         .procname       = "gc_thresh1",
3130                         .maxlen         = sizeof(int),
3131                         .mode           = 0644,
3132                         .extra1         = &zero,
3133                         .extra2         = &int_max,
3134                         .proc_handler   = proc_dointvec_minmax,
3135                 },
3136                 [NEIGH_VAR_GC_THRESH2] = {
3137                         .procname       = "gc_thresh2",
3138                         .maxlen         = sizeof(int),
3139                         .mode           = 0644,
3140                         .extra1         = &zero,
3141                         .extra2         = &int_max,
3142                         .proc_handler   = proc_dointvec_minmax,
3143                 },
3144                 [NEIGH_VAR_GC_THRESH3] = {
3145                         .procname       = "gc_thresh3",
3146                         .maxlen         = sizeof(int),
3147                         .mode           = 0644,
3148                         .extra1         = &zero,
3149                         .extra2         = &int_max,
3150                         .proc_handler   = proc_dointvec_minmax,
3151                 },
3152                 {},
3153         },
3154 };
3155
3156 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3157                           proc_handler *handler)
3158 {
3159         int i;
3160         struct neigh_sysctl_table *t;
3161         const char *dev_name_source;
3162         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3163         char *p_name;
3164
3165         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3166         if (!t)
3167                 goto err;
3168
3169         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3170                 t->neigh_vars[i].data += (long) p;
3171                 t->neigh_vars[i].extra1 = dev;
3172                 t->neigh_vars[i].extra2 = p;
3173         }
3174
3175         if (dev) {
3176                 dev_name_source = dev->name;
3177                 /* Terminate the table early */
3178                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3179                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3180         } else {
3181                 struct neigh_table *tbl = p->tbl;
3182                 dev_name_source = "default";
3183                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3184                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3185                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3186                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3187         }
3188
3189         if (handler) {
3190                 /* RetransTime */
3191                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3192                 /* ReachableTime */
3193                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3194                 /* RetransTime (in milliseconds)*/
3195                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3196                 /* ReachableTime (in milliseconds) */
3197                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3198         } else {
3199                 /* Those handlers will update p->reachable_time after
3200                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3201                  * applied after the next neighbour update instead of waiting for
3202                  * neigh_periodic_work to update its value (can be multiple minutes)
3203                  * So any handler that replaces them should do this as well
3204                  */
3205                 /* ReachableTime */
3206                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3207                         neigh_proc_base_reachable_time;
3208                 /* ReachableTime (in milliseconds) */
3209                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3210                         neigh_proc_base_reachable_time;
3211         }
3212
3213         /* Don't export sysctls to unprivileged users */
3214         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3215                 t->neigh_vars[0].procname = NULL;
3216
3217         switch (neigh_parms_family(p)) {
3218         case AF_INET:
3219               p_name = "ipv4";
3220               break;
3221         case AF_INET6:
3222               p_name = "ipv6";
3223               break;
3224         default:
3225               BUG();
3226         }
3227
3228         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3229                 p_name, dev_name_source);
3230         t->sysctl_header =
3231                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3232         if (!t->sysctl_header)
3233                 goto free;
3234
3235         p->sysctl_table = t;
3236         return 0;
3237
3238 free:
3239         kfree(t);
3240 err:
3241         return -ENOBUFS;
3242 }
3243 EXPORT_SYMBOL(neigh_sysctl_register);
3244
3245 void neigh_sysctl_unregister(struct neigh_parms *p)
3246 {
3247         if (p->sysctl_table) {
3248                 struct neigh_sysctl_table *t = p->sysctl_table;
3249                 p->sysctl_table = NULL;
3250                 unregister_net_sysctl_table(t->sysctl_header);
3251                 kfree(t);
3252         }
3253 }
3254 EXPORT_SYMBOL(neigh_sysctl_unregister);
3255
3256 #endif  /* CONFIG_SYSCTL */
3257
3258 static int __init neigh_init(void)
3259 {
3260         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3261         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3262         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3263
3264         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3265                       NULL);
3266         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3267
3268         return 0;
3269 }
3270
3271 subsys_initcall(neigh_init);
3272