GNU Linux-libre 4.9.318-gnu1
[releases.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 #define IPV6ONLY_FLAGS  \
71                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
72                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
73                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
74
75 static struct ipv4_devconf ipv4_devconf = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
82                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
83         },
84 };
85
86 static struct ipv4_devconf ipv4_devconf_dflt = {
87         .data = {
88                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
89                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
90                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
91                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
92                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
93                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
94                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
95         },
96 };
97
98 #define IPV4_DEVCONF_DFLT(net, attr) \
99         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
100
101 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
102         [IFA_LOCAL]             = { .type = NLA_U32 },
103         [IFA_ADDRESS]           = { .type = NLA_U32 },
104         [IFA_BROADCAST]         = { .type = NLA_U32 },
105         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
106         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
107         [IFA_FLAGS]             = { .type = NLA_U32 },
108 };
109
110 #define IN4_ADDR_HSIZE_SHIFT    8
111 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
112
113 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
114
115 static u32 inet_addr_hash(const struct net *net, __be32 addr)
116 {
117         u32 val = (__force u32) addr ^ net_hash_mix(net);
118
119         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
120 }
121
122 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
123 {
124         u32 hash = inet_addr_hash(net, ifa->ifa_local);
125
126         ASSERT_RTNL();
127         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
128 }
129
130 static void inet_hash_remove(struct in_ifaddr *ifa)
131 {
132         ASSERT_RTNL();
133         hlist_del_init_rcu(&ifa->hash);
134 }
135
136 /**
137  * __ip_dev_find - find the first device with a given source address.
138  * @net: the net namespace
139  * @addr: the source address
140  * @devref: if true, take a reference on the found device
141  *
142  * If a caller uses devref=false, it should be protected by RCU, or RTNL
143  */
144 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
145 {
146         u32 hash = inet_addr_hash(net, addr);
147         struct net_device *result = NULL;
148         struct in_ifaddr *ifa;
149
150         rcu_read_lock();
151         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
152                 if (ifa->ifa_local == addr) {
153                         struct net_device *dev = ifa->ifa_dev->dev;
154
155                         if (!net_eq(dev_net(dev), net))
156                                 continue;
157                         result = dev;
158                         break;
159                 }
160         }
161         if (!result) {
162                 struct flowi4 fl4 = { .daddr = addr };
163                 struct fib_result res = { 0 };
164                 struct fib_table *local;
165
166                 /* Fallback to FIB local table so that communication
167                  * over loopback subnets work.
168                  */
169                 local = fib_get_table(net, RT_TABLE_LOCAL);
170                 if (local &&
171                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
172                     res.type == RTN_LOCAL)
173                         result = FIB_RES_DEV(res);
174         }
175         if (result && devref)
176                 dev_hold(result);
177         rcu_read_unlock();
178         return result;
179 }
180 EXPORT_SYMBOL(__ip_dev_find);
181
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
186                          int destroy);
187 #ifdef CONFIG_SYSCTL
188 static int devinet_sysctl_register(struct in_device *idev);
189 static void devinet_sysctl_unregister(struct in_device *idev);
190 #else
191 static int devinet_sysctl_register(struct in_device *idev)
192 {
193         return 0;
194 }
195 static void devinet_sysctl_unregister(struct in_device *idev)
196 {
197 }
198 #endif
199
200 /* Locks all the inet devices. */
201
202 static struct in_ifaddr *inet_alloc_ifa(void)
203 {
204         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
205 }
206
207 static void inet_rcu_free_ifa(struct rcu_head *head)
208 {
209         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
210         if (ifa->ifa_dev)
211                 in_dev_put(ifa->ifa_dev);
212         kfree(ifa);
213 }
214
215 static void inet_free_ifa(struct in_ifaddr *ifa)
216 {
217         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
218 }
219
220 void in_dev_finish_destroy(struct in_device *idev)
221 {
222         struct net_device *dev = idev->dev;
223
224         WARN_ON(idev->ifa_list);
225         WARN_ON(idev->mc_list);
226         kfree(rcu_dereference_protected(idev->mc_hash, 1));
227 #ifdef NET_REFCNT_DEBUG
228         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
229 #endif
230         dev_put(dev);
231         if (!idev->dead)
232                 pr_err("Freeing alive in_device %p\n", idev);
233         else
234                 kfree(idev);
235 }
236 EXPORT_SYMBOL(in_dev_finish_destroy);
237
238 static struct in_device *inetdev_init(struct net_device *dev)
239 {
240         struct in_device *in_dev;
241         int err = -ENOMEM;
242
243         ASSERT_RTNL();
244
245         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
246         if (!in_dev)
247                 goto out;
248         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
249                         sizeof(in_dev->cnf));
250         in_dev->cnf.sysctl = NULL;
251         in_dev->dev = dev;
252         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
253         if (!in_dev->arp_parms)
254                 goto out_kfree;
255         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
256                 dev_disable_lro(dev);
257         /* Reference in_dev->dev */
258         dev_hold(dev);
259         /* Account for reference dev->ip_ptr (below) */
260         in_dev_hold(in_dev);
261
262         err = devinet_sysctl_register(in_dev);
263         if (err) {
264                 in_dev->dead = 1;
265                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451
452         ASSERT_RTNL();
453
454         if (!ifa->ifa_local) {
455                 inet_free_ifa(ifa);
456                 return 0;
457         }
458
459         ifa->ifa_flags &= ~IFA_F_SECONDARY;
460         last_primary = &in_dev->ifa_list;
461
462         /* Don't set IPv6 only flags to IPv4 addresses */
463         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
464
465         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466              ifap = &ifa1->ifa_next) {
467                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468                     ifa->ifa_scope <= ifa1->ifa_scope)
469                         last_primary = &ifa1->ifa_next;
470                 if (ifa1->ifa_mask == ifa->ifa_mask &&
471                     inet_ifa_match(ifa1->ifa_address, ifa)) {
472                         if (ifa1->ifa_local == ifa->ifa_local) {
473                                 inet_free_ifa(ifa);
474                                 return -EEXIST;
475                         }
476                         if (ifa1->ifa_scope != ifa->ifa_scope) {
477                                 inet_free_ifa(ifa);
478                                 return -EINVAL;
479                         }
480                         ifa->ifa_flags |= IFA_F_SECONDARY;
481                 }
482         }
483
484         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
485                 prandom_seed((__force u32) ifa->ifa_local);
486                 ifap = last_primary;
487         }
488
489         ifa->ifa_next = *ifap;
490         *ifap = ifa;
491
492         inet_hash_insert(dev_net(in_dev->dev), ifa);
493
494         cancel_delayed_work(&check_lifetime_work);
495         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
496
497         /* Send message first, then call notifier.
498            Notifier will trigger FIB update, so that
499            listeners of netlink will know about new ifaddr */
500         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
501         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
502
503         return 0;
504 }
505
506 static int inet_insert_ifa(struct in_ifaddr *ifa)
507 {
508         return __inet_insert_ifa(ifa, NULL, 0);
509 }
510
511 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
512 {
513         struct in_device *in_dev = __in_dev_get_rtnl(dev);
514
515         ASSERT_RTNL();
516
517         if (!in_dev) {
518                 inet_free_ifa(ifa);
519                 return -ENOBUFS;
520         }
521         ipv4_devconf_setall(in_dev);
522         neigh_parms_data_state_setall(in_dev->arp_parms);
523         if (ifa->ifa_dev != in_dev) {
524                 WARN_ON(ifa->ifa_dev);
525                 in_dev_hold(in_dev);
526                 ifa->ifa_dev = in_dev;
527         }
528         if (ipv4_is_loopback(ifa->ifa_local))
529                 ifa->ifa_scope = RT_SCOPE_HOST;
530         return inet_insert_ifa(ifa);
531 }
532
533 /* Caller must hold RCU or RTNL :
534  * We dont take a reference on found in_device
535  */
536 struct in_device *inetdev_by_index(struct net *net, int ifindex)
537 {
538         struct net_device *dev;
539         struct in_device *in_dev = NULL;
540
541         rcu_read_lock();
542         dev = dev_get_by_index_rcu(net, ifindex);
543         if (dev)
544                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
545         rcu_read_unlock();
546         return in_dev;
547 }
548 EXPORT_SYMBOL(inetdev_by_index);
549
550 /* Called only from RTNL semaphored context. No locks. */
551
552 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
553                                     __be32 mask)
554 {
555         ASSERT_RTNL();
556
557         for_primary_ifa(in_dev) {
558                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
559                         return ifa;
560         } endfor_ifa(in_dev);
561         return NULL;
562 }
563
564 static int ip_mc_autojoin_config(struct net *net, bool join,
565                                  const struct in_ifaddr *ifa)
566 {
567 #if defined(CONFIG_IP_MULTICAST)
568         struct ip_mreqn mreq = {
569                 .imr_multiaddr.s_addr = ifa->ifa_address,
570                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
571         };
572         struct sock *sk = net->ipv4.mc_autojoin_sk;
573         int ret;
574
575         ASSERT_RTNL();
576
577         lock_sock(sk);
578         if (join)
579                 ret = ip_mc_join_group(sk, &mreq);
580         else
581                 ret = ip_mc_leave_group(sk, &mreq);
582         release_sock(sk);
583
584         return ret;
585 #else
586         return -EOPNOTSUPP;
587 #endif
588 }
589
590 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
591 {
592         struct net *net = sock_net(skb->sk);
593         struct nlattr *tb[IFA_MAX+1];
594         struct in_device *in_dev;
595         struct ifaddrmsg *ifm;
596         struct in_ifaddr *ifa, **ifap;
597         int err = -EINVAL;
598
599         ASSERT_RTNL();
600
601         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
602         if (err < 0)
603                 goto errout;
604
605         ifm = nlmsg_data(nlh);
606         in_dev = inetdev_by_index(net, ifm->ifa_index);
607         if (!in_dev) {
608                 err = -ENODEV;
609                 goto errout;
610         }
611
612         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
613              ifap = &ifa->ifa_next) {
614                 if (tb[IFA_LOCAL] &&
615                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
616                         continue;
617
618                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
619                         continue;
620
621                 if (tb[IFA_ADDRESS] &&
622                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
623                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
624                         continue;
625
626                 if (ipv4_is_multicast(ifa->ifa_address))
627                         ip_mc_autojoin_config(net, false, ifa);
628                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
629                 return 0;
630         }
631
632         err = -EADDRNOTAVAIL;
633 errout:
634         return err;
635 }
636
637 #define INFINITY_LIFE_TIME      0xFFFFFFFF
638
639 static void check_lifetime(struct work_struct *work)
640 {
641         unsigned long now, next, next_sec, next_sched;
642         struct in_ifaddr *ifa;
643         struct hlist_node *n;
644         int i;
645
646         now = jiffies;
647         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
648
649         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
650                 bool change_needed = false;
651
652                 rcu_read_lock();
653                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
654                         unsigned long age;
655
656                         if (ifa->ifa_flags & IFA_F_PERMANENT)
657                                 continue;
658
659                         /* We try to batch several events at once. */
660                         age = (now - ifa->ifa_tstamp +
661                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
662
663                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
664                             age >= ifa->ifa_valid_lft) {
665                                 change_needed = true;
666                         } else if (ifa->ifa_preferred_lft ==
667                                    INFINITY_LIFE_TIME) {
668                                 continue;
669                         } else if (age >= ifa->ifa_preferred_lft) {
670                                 if (time_before(ifa->ifa_tstamp +
671                                                 ifa->ifa_valid_lft * HZ, next))
672                                         next = ifa->ifa_tstamp +
673                                                ifa->ifa_valid_lft * HZ;
674
675                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
676                                         change_needed = true;
677                         } else if (time_before(ifa->ifa_tstamp +
678                                                ifa->ifa_preferred_lft * HZ,
679                                                next)) {
680                                 next = ifa->ifa_tstamp +
681                                        ifa->ifa_preferred_lft * HZ;
682                         }
683                 }
684                 rcu_read_unlock();
685                 if (!change_needed)
686                         continue;
687                 rtnl_lock();
688                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
689                         unsigned long age;
690
691                         if (ifa->ifa_flags & IFA_F_PERMANENT)
692                                 continue;
693
694                         /* We try to batch several events at once. */
695                         age = (now - ifa->ifa_tstamp +
696                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
697
698                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
699                             age >= ifa->ifa_valid_lft) {
700                                 struct in_ifaddr **ifap;
701
702                                 for (ifap = &ifa->ifa_dev->ifa_list;
703                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
704                                         if (*ifap == ifa) {
705                                                 inet_del_ifa(ifa->ifa_dev,
706                                                              ifap, 1);
707                                                 break;
708                                         }
709                                 }
710                         } else if (ifa->ifa_preferred_lft !=
711                                    INFINITY_LIFE_TIME &&
712                                    age >= ifa->ifa_preferred_lft &&
713                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
714                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
715                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
716                         }
717                 }
718                 rtnl_unlock();
719         }
720
721         next_sec = round_jiffies_up(next);
722         next_sched = next;
723
724         /* If rounded timeout is accurate enough, accept it. */
725         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
726                 next_sched = next_sec;
727
728         now = jiffies;
729         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
730         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
731                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
732
733         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
734                         next_sched - now);
735 }
736
737 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
738                              __u32 prefered_lft)
739 {
740         unsigned long timeout;
741
742         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
743
744         timeout = addrconf_timeout_fixup(valid_lft, HZ);
745         if (addrconf_finite_timeout(timeout))
746                 ifa->ifa_valid_lft = timeout;
747         else
748                 ifa->ifa_flags |= IFA_F_PERMANENT;
749
750         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
751         if (addrconf_finite_timeout(timeout)) {
752                 if (timeout == 0)
753                         ifa->ifa_flags |= IFA_F_DEPRECATED;
754                 ifa->ifa_preferred_lft = timeout;
755         }
756         ifa->ifa_tstamp = jiffies;
757         if (!ifa->ifa_cstamp)
758                 ifa->ifa_cstamp = ifa->ifa_tstamp;
759 }
760
761 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
762                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
763 {
764         struct nlattr *tb[IFA_MAX+1];
765         struct in_ifaddr *ifa;
766         struct ifaddrmsg *ifm;
767         struct net_device *dev;
768         struct in_device *in_dev;
769         int err;
770
771         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
772         if (err < 0)
773                 goto errout;
774
775         ifm = nlmsg_data(nlh);
776         err = -EINVAL;
777         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
778                 goto errout;
779
780         dev = __dev_get_by_index(net, ifm->ifa_index);
781         err = -ENODEV;
782         if (!dev)
783                 goto errout;
784
785         in_dev = __in_dev_get_rtnl(dev);
786         err = -ENOBUFS;
787         if (!in_dev)
788                 goto errout;
789
790         ifa = inet_alloc_ifa();
791         if (!ifa)
792                 /*
793                  * A potential indev allocation can be left alive, it stays
794                  * assigned to its device and is destroy with it.
795                  */
796                 goto errout;
797
798         ipv4_devconf_setall(in_dev);
799         neigh_parms_data_state_setall(in_dev->arp_parms);
800         in_dev_hold(in_dev);
801
802         if (!tb[IFA_ADDRESS])
803                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
804
805         INIT_HLIST_NODE(&ifa->hash);
806         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
807         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
808         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
809                                          ifm->ifa_flags;
810         ifa->ifa_scope = ifm->ifa_scope;
811         ifa->ifa_dev = in_dev;
812
813         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
814         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
815
816         if (tb[IFA_BROADCAST])
817                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
818
819         if (tb[IFA_LABEL])
820                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
821         else
822                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
823
824         if (tb[IFA_CACHEINFO]) {
825                 struct ifa_cacheinfo *ci;
826
827                 ci = nla_data(tb[IFA_CACHEINFO]);
828                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
829                         err = -EINVAL;
830                         goto errout_free;
831                 }
832                 *pvalid_lft = ci->ifa_valid;
833                 *pprefered_lft = ci->ifa_prefered;
834         }
835
836         return ifa;
837
838 errout_free:
839         inet_free_ifa(ifa);
840 errout:
841         return ERR_PTR(err);
842 }
843
844 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
845 {
846         struct in_device *in_dev = ifa->ifa_dev;
847         struct in_ifaddr *ifa1, **ifap;
848
849         if (!ifa->ifa_local)
850                 return NULL;
851
852         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
853              ifap = &ifa1->ifa_next) {
854                 if (ifa1->ifa_mask == ifa->ifa_mask &&
855                     inet_ifa_match(ifa1->ifa_address, ifa) &&
856                     ifa1->ifa_local == ifa->ifa_local)
857                         return ifa1;
858         }
859         return NULL;
860 }
861
862 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
863 {
864         struct net *net = sock_net(skb->sk);
865         struct in_ifaddr *ifa;
866         struct in_ifaddr *ifa_existing;
867         __u32 valid_lft = INFINITY_LIFE_TIME;
868         __u32 prefered_lft = INFINITY_LIFE_TIME;
869
870         ASSERT_RTNL();
871
872         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
873         if (IS_ERR(ifa))
874                 return PTR_ERR(ifa);
875
876         ifa_existing = find_matching_ifa(ifa);
877         if (!ifa_existing) {
878                 /* It would be best to check for !NLM_F_CREATE here but
879                  * userspace already relies on not having to provide this.
880                  */
881                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
882                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
883                         int ret = ip_mc_autojoin_config(net, true, ifa);
884
885                         if (ret < 0) {
886                                 inet_free_ifa(ifa);
887                                 return ret;
888                         }
889                 }
890                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
891         } else {
892                 inet_free_ifa(ifa);
893
894                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
895                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
896                         return -EEXIST;
897                 ifa = ifa_existing;
898                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
899                 cancel_delayed_work(&check_lifetime_work);
900                 queue_delayed_work(system_power_efficient_wq,
901                                 &check_lifetime_work, 0);
902                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
903         }
904         return 0;
905 }
906
907 /*
908  *      Determine a default network mask, based on the IP address.
909  */
910
911 static int inet_abc_len(__be32 addr)
912 {
913         int rc = -1;    /* Something else, probably a multicast. */
914
915         if (ipv4_is_zeronet(addr))
916                 rc = 0;
917         else {
918                 __u32 haddr = ntohl(addr);
919
920                 if (IN_CLASSA(haddr))
921                         rc = 8;
922                 else if (IN_CLASSB(haddr))
923                         rc = 16;
924                 else if (IN_CLASSC(haddr))
925                         rc = 24;
926         }
927
928         return rc;
929 }
930
931
932 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
933 {
934         struct ifreq ifr;
935         struct sockaddr_in sin_orig;
936         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
937         struct in_device *in_dev;
938         struct in_ifaddr **ifap = NULL;
939         struct in_ifaddr *ifa = NULL;
940         struct net_device *dev;
941         char *colon;
942         int ret = -EFAULT;
943         int tryaddrmatch = 0;
944
945         /*
946          *      Fetch the caller's info block into kernel space
947          */
948
949         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
950                 goto out;
951         ifr.ifr_name[IFNAMSIZ - 1] = 0;
952
953         /* save original address for comparison */
954         memcpy(&sin_orig, sin, sizeof(*sin));
955
956         colon = strchr(ifr.ifr_name, ':');
957         if (colon)
958                 *colon = 0;
959
960         dev_load(net, ifr.ifr_name);
961
962         switch (cmd) {
963         case SIOCGIFADDR:       /* Get interface address */
964         case SIOCGIFBRDADDR:    /* Get the broadcast address */
965         case SIOCGIFDSTADDR:    /* Get the destination address */
966         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
967                 /* Note that these ioctls will not sleep,
968                    so that we do not impose a lock.
969                    One day we will be forced to put shlock here (I mean SMP)
970                  */
971                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
972                 memset(sin, 0, sizeof(*sin));
973                 sin->sin_family = AF_INET;
974                 break;
975
976         case SIOCSIFFLAGS:
977                 ret = -EPERM;
978                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
979                         goto out;
980                 break;
981         case SIOCSIFADDR:       /* Set interface address (and family) */
982         case SIOCSIFBRDADDR:    /* Set the broadcast address */
983         case SIOCSIFDSTADDR:    /* Set the destination address */
984         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
985                 ret = -EPERM;
986                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
987                         goto out;
988                 ret = -EINVAL;
989                 if (sin->sin_family != AF_INET)
990                         goto out;
991                 break;
992         default:
993                 ret = -EINVAL;
994                 goto out;
995         }
996
997         rtnl_lock();
998
999         ret = -ENODEV;
1000         dev = __dev_get_by_name(net, ifr.ifr_name);
1001         if (!dev)
1002                 goto done;
1003
1004         if (colon)
1005                 *colon = ':';
1006
1007         in_dev = __in_dev_get_rtnl(dev);
1008         if (in_dev) {
1009                 if (tryaddrmatch) {
1010                         /* Matthias Andree */
1011                         /* compare label and address (4.4BSD style) */
1012                         /* note: we only do this for a limited set of ioctls
1013                            and only if the original address family was AF_INET.
1014                            This is checked above. */
1015                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1016                              ifap = &ifa->ifa_next) {
1017                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1018                                     sin_orig.sin_addr.s_addr ==
1019                                                         ifa->ifa_local) {
1020                                         break; /* found */
1021                                 }
1022                         }
1023                 }
1024                 /* we didn't get a match, maybe the application is
1025                    4.3BSD-style and passed in junk so we fall back to
1026                    comparing just the label */
1027                 if (!ifa) {
1028                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1029                              ifap = &ifa->ifa_next)
1030                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1031                                         break;
1032                 }
1033         }
1034
1035         ret = -EADDRNOTAVAIL;
1036         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1037                 goto done;
1038
1039         switch (cmd) {
1040         case SIOCGIFADDR:       /* Get interface address */
1041                 sin->sin_addr.s_addr = ifa->ifa_local;
1042                 goto rarok;
1043
1044         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1045                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1046                 goto rarok;
1047
1048         case SIOCGIFDSTADDR:    /* Get the destination address */
1049                 sin->sin_addr.s_addr = ifa->ifa_address;
1050                 goto rarok;
1051
1052         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1053                 sin->sin_addr.s_addr = ifa->ifa_mask;
1054                 goto rarok;
1055
1056         case SIOCSIFFLAGS:
1057                 if (colon) {
1058                         ret = -EADDRNOTAVAIL;
1059                         if (!ifa)
1060                                 break;
1061                         ret = 0;
1062                         if (!(ifr.ifr_flags & IFF_UP))
1063                                 inet_del_ifa(in_dev, ifap, 1);
1064                         break;
1065                 }
1066                 ret = dev_change_flags(dev, ifr.ifr_flags);
1067                 break;
1068
1069         case SIOCSIFADDR:       /* Set interface address (and family) */
1070                 ret = -EINVAL;
1071                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072                         break;
1073
1074                 if (!ifa) {
1075                         ret = -ENOBUFS;
1076                         ifa = inet_alloc_ifa();
1077                         if (!ifa)
1078                                 break;
1079                         INIT_HLIST_NODE(&ifa->hash);
1080                         if (colon)
1081                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1082                         else
1083                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084                 } else {
1085                         ret = 0;
1086                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1087                                 break;
1088                         inet_del_ifa(in_dev, ifap, 0);
1089                         ifa->ifa_broadcast = 0;
1090                         ifa->ifa_scope = 0;
1091                 }
1092
1093                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1094
1095                 if (!(dev->flags & IFF_POINTOPOINT)) {
1096                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1097                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1098                         if ((dev->flags & IFF_BROADCAST) &&
1099                             ifa->ifa_prefixlen < 31)
1100                                 ifa->ifa_broadcast = ifa->ifa_address |
1101                                                      ~ifa->ifa_mask;
1102                 } else {
1103                         ifa->ifa_prefixlen = 32;
1104                         ifa->ifa_mask = inet_make_mask(32);
1105                 }
1106                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1107                 ret = inet_set_ifa(dev, ifa);
1108                 break;
1109
1110         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1111                 ret = 0;
1112                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1113                         inet_del_ifa(in_dev, ifap, 0);
1114                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1115                         inet_insert_ifa(ifa);
1116                 }
1117                 break;
1118
1119         case SIOCSIFDSTADDR:    /* Set the destination address */
1120                 ret = 0;
1121                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1122                         break;
1123                 ret = -EINVAL;
1124                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1125                         break;
1126                 ret = 0;
1127                 inet_del_ifa(in_dev, ifap, 0);
1128                 ifa->ifa_address = sin->sin_addr.s_addr;
1129                 inet_insert_ifa(ifa);
1130                 break;
1131
1132         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1133
1134                 /*
1135                  *      The mask we set must be legal.
1136                  */
1137                 ret = -EINVAL;
1138                 if (bad_mask(sin->sin_addr.s_addr, 0))
1139                         break;
1140                 ret = 0;
1141                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1142                         __be32 old_mask = ifa->ifa_mask;
1143                         inet_del_ifa(in_dev, ifap, 0);
1144                         ifa->ifa_mask = sin->sin_addr.s_addr;
1145                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1146
1147                         /* See if current broadcast address matches
1148                          * with current netmask, then recalculate
1149                          * the broadcast address. Otherwise it's a
1150                          * funny address, so don't touch it since
1151                          * the user seems to know what (s)he's doing...
1152                          */
1153                         if ((dev->flags & IFF_BROADCAST) &&
1154                             (ifa->ifa_prefixlen < 31) &&
1155                             (ifa->ifa_broadcast ==
1156                              (ifa->ifa_local|~old_mask))) {
1157                                 ifa->ifa_broadcast = (ifa->ifa_local |
1158                                                       ~sin->sin_addr.s_addr);
1159                         }
1160                         inet_insert_ifa(ifa);
1161                 }
1162                 break;
1163         }
1164 done:
1165         rtnl_unlock();
1166 out:
1167         return ret;
1168 rarok:
1169         rtnl_unlock();
1170         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1171         goto out;
1172 }
1173
1174 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1175 {
1176         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1177         struct in_ifaddr *ifa;
1178         struct ifreq ifr;
1179         int done = 0;
1180
1181         if (!in_dev)
1182                 goto out;
1183
1184         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1185                 if (!buf) {
1186                         done += sizeof(ifr);
1187                         continue;
1188                 }
1189                 if (len < (int) sizeof(ifr))
1190                         break;
1191                 memset(&ifr, 0, sizeof(struct ifreq));
1192                 strcpy(ifr.ifr_name, ifa->ifa_label);
1193
1194                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1195                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1196                                                                 ifa->ifa_local;
1197
1198                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1199                         done = -EFAULT;
1200                         break;
1201                 }
1202                 buf  += sizeof(struct ifreq);
1203                 len  -= sizeof(struct ifreq);
1204                 done += sizeof(struct ifreq);
1205         }
1206 out:
1207         return done;
1208 }
1209
1210 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1211 {
1212         __be32 addr = 0;
1213         struct in_device *in_dev;
1214         struct net *net = dev_net(dev);
1215         int master_idx;
1216
1217         rcu_read_lock();
1218         in_dev = __in_dev_get_rcu(dev);
1219         if (!in_dev)
1220                 goto no_in_dev;
1221
1222         for_primary_ifa(in_dev) {
1223                 if (ifa->ifa_scope > scope)
1224                         continue;
1225                 if (!dst || inet_ifa_match(dst, ifa)) {
1226                         addr = ifa->ifa_local;
1227                         break;
1228                 }
1229                 if (!addr)
1230                         addr = ifa->ifa_local;
1231         } endfor_ifa(in_dev);
1232
1233         if (addr)
1234                 goto out_unlock;
1235 no_in_dev:
1236         master_idx = l3mdev_master_ifindex_rcu(dev);
1237
1238         /* For VRFs, the VRF device takes the place of the loopback device,
1239          * with addresses on it being preferred.  Note in such cases the
1240          * loopback device will be among the devices that fail the master_idx
1241          * equality check in the loop below.
1242          */
1243         if (master_idx &&
1244             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1245             (in_dev = __in_dev_get_rcu(dev))) {
1246                 for_primary_ifa(in_dev) {
1247                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1248                             ifa->ifa_scope <= scope) {
1249                                 addr = ifa->ifa_local;
1250                                 goto out_unlock;
1251                         }
1252                 } endfor_ifa(in_dev);
1253         }
1254
1255         /* Not loopback addresses on loopback should be preferred
1256            in this case. It is important that lo is the first interface
1257            in dev_base list.
1258          */
1259         for_each_netdev_rcu(net, dev) {
1260                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1261                         continue;
1262
1263                 in_dev = __in_dev_get_rcu(dev);
1264                 if (!in_dev)
1265                         continue;
1266
1267                 for_primary_ifa(in_dev) {
1268                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1269                             ifa->ifa_scope <= scope) {
1270                                 addr = ifa->ifa_local;
1271                                 goto out_unlock;
1272                         }
1273                 } endfor_ifa(in_dev);
1274         }
1275 out_unlock:
1276         rcu_read_unlock();
1277         return addr;
1278 }
1279 EXPORT_SYMBOL(inet_select_addr);
1280
1281 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1282                               __be32 local, int scope)
1283 {
1284         int same = 0;
1285         __be32 addr = 0;
1286
1287         for_ifa(in_dev) {
1288                 if (!addr &&
1289                     (local == ifa->ifa_local || !local) &&
1290                     ifa->ifa_scope <= scope) {
1291                         addr = ifa->ifa_local;
1292                         if (same)
1293                                 break;
1294                 }
1295                 if (!same) {
1296                         same = (!local || inet_ifa_match(local, ifa)) &&
1297                                 (!dst || inet_ifa_match(dst, ifa));
1298                         if (same && addr) {
1299                                 if (local || !dst)
1300                                         break;
1301                                 /* Is the selected addr into dst subnet? */
1302                                 if (inet_ifa_match(addr, ifa))
1303                                         break;
1304                                 /* No, then can we use new local src? */
1305                                 if (ifa->ifa_scope <= scope) {
1306                                         addr = ifa->ifa_local;
1307                                         break;
1308                                 }
1309                                 /* search for large dst subnet for addr */
1310                                 same = 0;
1311                         }
1312                 }
1313         } endfor_ifa(in_dev);
1314
1315         return same ? addr : 0;
1316 }
1317
1318 /*
1319  * Confirm that local IP address exists using wildcards:
1320  * - net: netns to check, cannot be NULL
1321  * - in_dev: only on this interface, NULL=any interface
1322  * - dst: only in the same subnet as dst, 0=any dst
1323  * - local: address, 0=autoselect the local address
1324  * - scope: maximum allowed scope value for the local address
1325  */
1326 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1327                          __be32 dst, __be32 local, int scope)
1328 {
1329         __be32 addr = 0;
1330         struct net_device *dev;
1331
1332         if (in_dev)
1333                 return confirm_addr_indev(in_dev, dst, local, scope);
1334
1335         rcu_read_lock();
1336         for_each_netdev_rcu(net, dev) {
1337                 in_dev = __in_dev_get_rcu(dev);
1338                 if (in_dev) {
1339                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1340                         if (addr)
1341                                 break;
1342                 }
1343         }
1344         rcu_read_unlock();
1345
1346         return addr;
1347 }
1348 EXPORT_SYMBOL(inet_confirm_addr);
1349
1350 /*
1351  *      Device notifier
1352  */
1353
1354 int register_inetaddr_notifier(struct notifier_block *nb)
1355 {
1356         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1357 }
1358 EXPORT_SYMBOL(register_inetaddr_notifier);
1359
1360 int unregister_inetaddr_notifier(struct notifier_block *nb)
1361 {
1362         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1363 }
1364 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1365
1366 /* Rename ifa_labels for a device name change. Make some effort to preserve
1367  * existing alias numbering and to create unique labels if possible.
1368 */
1369 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1370 {
1371         struct in_ifaddr *ifa;
1372         int named = 0;
1373
1374         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1375                 char old[IFNAMSIZ], *dot;
1376
1377                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1378                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1379                 if (named++ == 0)
1380                         goto skip;
1381                 dot = strchr(old, ':');
1382                 if (!dot) {
1383                         sprintf(old, ":%d", named);
1384                         dot = old;
1385                 }
1386                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1387                         strcat(ifa->ifa_label, dot);
1388                 else
1389                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1390 skip:
1391                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1392         }
1393 }
1394
1395 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1396                                         struct in_device *in_dev)
1397
1398 {
1399         struct in_ifaddr *ifa;
1400
1401         for (ifa = in_dev->ifa_list; ifa;
1402              ifa = ifa->ifa_next) {
1403                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1404                          ifa->ifa_local, dev,
1405                          ifa->ifa_local, NULL,
1406                          dev->dev_addr, NULL);
1407         }
1408 }
1409
1410 /* Called only under RTNL semaphore */
1411
1412 static int inetdev_event(struct notifier_block *this, unsigned long event,
1413                          void *ptr)
1414 {
1415         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1416         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1417
1418         ASSERT_RTNL();
1419
1420         if (!in_dev) {
1421                 if (event == NETDEV_REGISTER) {
1422                         in_dev = inetdev_init(dev);
1423                         if (IS_ERR(in_dev))
1424                                 return notifier_from_errno(PTR_ERR(in_dev));
1425                         if (dev->flags & IFF_LOOPBACK) {
1426                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1427                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1428                         }
1429                 } else if (event == NETDEV_CHANGEMTU) {
1430                         /* Re-enabling IP */
1431                         if (inetdev_valid_mtu(dev->mtu))
1432                                 in_dev = inetdev_init(dev);
1433                 }
1434                 goto out;
1435         }
1436
1437         switch (event) {
1438         case NETDEV_REGISTER:
1439                 pr_debug("%s: bug\n", __func__);
1440                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1441                 break;
1442         case NETDEV_UP:
1443                 if (!inetdev_valid_mtu(dev->mtu))
1444                         break;
1445                 if (dev->flags & IFF_LOOPBACK) {
1446                         struct in_ifaddr *ifa = inet_alloc_ifa();
1447
1448                         if (ifa) {
1449                                 INIT_HLIST_NODE(&ifa->hash);
1450                                 ifa->ifa_local =
1451                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1452                                 ifa->ifa_prefixlen = 8;
1453                                 ifa->ifa_mask = inet_make_mask(8);
1454                                 in_dev_hold(in_dev);
1455                                 ifa->ifa_dev = in_dev;
1456                                 ifa->ifa_scope = RT_SCOPE_HOST;
1457                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1458                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1459                                                  INFINITY_LIFE_TIME);
1460                                 ipv4_devconf_setall(in_dev);
1461                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1462                                 inet_insert_ifa(ifa);
1463                         }
1464                 }
1465                 ip_mc_up(in_dev);
1466                 /* fall through */
1467         case NETDEV_CHANGEADDR:
1468                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1469                         break;
1470                 /* fall through */
1471         case NETDEV_NOTIFY_PEERS:
1472                 /* Send gratuitous ARP to notify of link change */
1473                 inetdev_send_gratuitous_arp(dev, in_dev);
1474                 break;
1475         case NETDEV_DOWN:
1476                 ip_mc_down(in_dev);
1477                 break;
1478         case NETDEV_PRE_TYPE_CHANGE:
1479                 ip_mc_unmap(in_dev);
1480                 break;
1481         case NETDEV_POST_TYPE_CHANGE:
1482                 ip_mc_remap(in_dev);
1483                 break;
1484         case NETDEV_CHANGEMTU:
1485                 if (inetdev_valid_mtu(dev->mtu))
1486                         break;
1487                 /* disable IP when MTU is not enough */
1488         case NETDEV_UNREGISTER:
1489                 inetdev_destroy(in_dev);
1490                 break;
1491         case NETDEV_CHANGENAME:
1492                 /* Do not notify about label change, this event is
1493                  * not interesting to applications using netlink.
1494                  */
1495                 inetdev_changename(dev, in_dev);
1496
1497                 devinet_sysctl_unregister(in_dev);
1498                 devinet_sysctl_register(in_dev);
1499                 break;
1500         }
1501 out:
1502         return NOTIFY_DONE;
1503 }
1504
1505 static struct notifier_block ip_netdev_notifier = {
1506         .notifier_call = inetdev_event,
1507 };
1508
1509 static size_t inet_nlmsg_size(void)
1510 {
1511         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1512                + nla_total_size(4) /* IFA_ADDRESS */
1513                + nla_total_size(4) /* IFA_LOCAL */
1514                + nla_total_size(4) /* IFA_BROADCAST */
1515                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1516                + nla_total_size(4)  /* IFA_FLAGS */
1517                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1518 }
1519
1520 static inline u32 cstamp_delta(unsigned long cstamp)
1521 {
1522         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1523 }
1524
1525 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1526                          unsigned long tstamp, u32 preferred, u32 valid)
1527 {
1528         struct ifa_cacheinfo ci;
1529
1530         ci.cstamp = cstamp_delta(cstamp);
1531         ci.tstamp = cstamp_delta(tstamp);
1532         ci.ifa_prefered = preferred;
1533         ci.ifa_valid = valid;
1534
1535         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1536 }
1537
1538 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1539                             u32 portid, u32 seq, int event, unsigned int flags)
1540 {
1541         struct ifaddrmsg *ifm;
1542         struct nlmsghdr  *nlh;
1543         u32 preferred, valid;
1544
1545         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1546         if (!nlh)
1547                 return -EMSGSIZE;
1548
1549         ifm = nlmsg_data(nlh);
1550         ifm->ifa_family = AF_INET;
1551         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1552         ifm->ifa_flags = ifa->ifa_flags;
1553         ifm->ifa_scope = ifa->ifa_scope;
1554         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1555
1556         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1557                 preferred = ifa->ifa_preferred_lft;
1558                 valid = ifa->ifa_valid_lft;
1559                 if (preferred != INFINITY_LIFE_TIME) {
1560                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1561
1562                         if (preferred > tval)
1563                                 preferred -= tval;
1564                         else
1565                                 preferred = 0;
1566                         if (valid != INFINITY_LIFE_TIME) {
1567                                 if (valid > tval)
1568                                         valid -= tval;
1569                                 else
1570                                         valid = 0;
1571                         }
1572                 }
1573         } else {
1574                 preferred = INFINITY_LIFE_TIME;
1575                 valid = INFINITY_LIFE_TIME;
1576         }
1577         if ((ifa->ifa_address &&
1578              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1579             (ifa->ifa_local &&
1580              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1581             (ifa->ifa_broadcast &&
1582              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1583             (ifa->ifa_label[0] &&
1584              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1585             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1586             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1587                           preferred, valid))
1588                 goto nla_put_failure;
1589
1590         nlmsg_end(skb, nlh);
1591         return 0;
1592
1593 nla_put_failure:
1594         nlmsg_cancel(skb, nlh);
1595         return -EMSGSIZE;
1596 }
1597
1598 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1599 {
1600         struct net *net = sock_net(skb->sk);
1601         int h, s_h;
1602         int idx, s_idx;
1603         int ip_idx, s_ip_idx;
1604         struct net_device *dev;
1605         struct in_device *in_dev;
1606         struct in_ifaddr *ifa;
1607         struct hlist_head *head;
1608
1609         s_h = cb->args[0];
1610         s_idx = idx = cb->args[1];
1611         s_ip_idx = ip_idx = cb->args[2];
1612
1613         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1614                 idx = 0;
1615                 head = &net->dev_index_head[h];
1616                 rcu_read_lock();
1617                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1618                           net->dev_base_seq;
1619                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1620                         if (idx < s_idx)
1621                                 goto cont;
1622                         if (h > s_h || idx > s_idx)
1623                                 s_ip_idx = 0;
1624                         in_dev = __in_dev_get_rcu(dev);
1625                         if (!in_dev)
1626                                 goto cont;
1627
1628                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1629                              ifa = ifa->ifa_next, ip_idx++) {
1630                                 if (ip_idx < s_ip_idx)
1631                                         continue;
1632                                 if (inet_fill_ifaddr(skb, ifa,
1633                                              NETLINK_CB(cb->skb).portid,
1634                                              cb->nlh->nlmsg_seq,
1635                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1636                                         rcu_read_unlock();
1637                                         goto done;
1638                                 }
1639                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1640                         }
1641 cont:
1642                         idx++;
1643                 }
1644                 rcu_read_unlock();
1645         }
1646
1647 done:
1648         cb->args[0] = h;
1649         cb->args[1] = idx;
1650         cb->args[2] = ip_idx;
1651
1652         return skb->len;
1653 }
1654
1655 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1656                       u32 portid)
1657 {
1658         struct sk_buff *skb;
1659         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1660         int err = -ENOBUFS;
1661         struct net *net;
1662
1663         net = dev_net(ifa->ifa_dev->dev);
1664         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1665         if (!skb)
1666                 goto errout;
1667
1668         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1669         if (err < 0) {
1670                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1671                 WARN_ON(err == -EMSGSIZE);
1672                 kfree_skb(skb);
1673                 goto errout;
1674         }
1675         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1676         return;
1677 errout:
1678         if (err < 0)
1679                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1680 }
1681
1682 static size_t inet_get_link_af_size(const struct net_device *dev,
1683                                     u32 ext_filter_mask)
1684 {
1685         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1686
1687         if (!in_dev)
1688                 return 0;
1689
1690         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1691 }
1692
1693 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1694                              u32 ext_filter_mask)
1695 {
1696         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1697         struct nlattr *nla;
1698         int i;
1699
1700         if (!in_dev)
1701                 return -ENODATA;
1702
1703         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1704         if (!nla)
1705                 return -EMSGSIZE;
1706
1707         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1708                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1709
1710         return 0;
1711 }
1712
1713 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1714         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1715 };
1716
1717 static int inet_validate_link_af(const struct net_device *dev,
1718                                  const struct nlattr *nla)
1719 {
1720         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1721         int err, rem;
1722
1723         if (dev && !__in_dev_get_rtnl(dev))
1724                 return -EAFNOSUPPORT;
1725
1726         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1727         if (err < 0)
1728                 return err;
1729
1730         if (tb[IFLA_INET_CONF]) {
1731                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1732                         int cfgid = nla_type(a);
1733
1734                         if (nla_len(a) < 4)
1735                                 return -EINVAL;
1736
1737                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1738                                 return -EINVAL;
1739                 }
1740         }
1741
1742         return 0;
1743 }
1744
1745 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1746 {
1747         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1748         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1749         int rem;
1750
1751         if (!in_dev)
1752                 return -EAFNOSUPPORT;
1753
1754         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1755                 BUG();
1756
1757         if (tb[IFLA_INET_CONF]) {
1758                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1759                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1760         }
1761
1762         return 0;
1763 }
1764
1765 static int inet_netconf_msgsize_devconf(int type)
1766 {
1767         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1768                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1769         bool all = false;
1770
1771         if (type == NETCONFA_ALL)
1772                 all = true;
1773
1774         if (all || type == NETCONFA_FORWARDING)
1775                 size += nla_total_size(4);
1776         if (all || type == NETCONFA_RP_FILTER)
1777                 size += nla_total_size(4);
1778         if (all || type == NETCONFA_MC_FORWARDING)
1779                 size += nla_total_size(4);
1780         if (all || type == NETCONFA_PROXY_NEIGH)
1781                 size += nla_total_size(4);
1782         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1783                 size += nla_total_size(4);
1784
1785         return size;
1786 }
1787
1788 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1789                                      struct ipv4_devconf *devconf, u32 portid,
1790                                      u32 seq, int event, unsigned int flags,
1791                                      int type)
1792 {
1793         struct nlmsghdr  *nlh;
1794         struct netconfmsg *ncm;
1795         bool all = false;
1796
1797         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1798                         flags);
1799         if (!nlh)
1800                 return -EMSGSIZE;
1801
1802         if (type == NETCONFA_ALL)
1803                 all = true;
1804
1805         ncm = nlmsg_data(nlh);
1806         ncm->ncm_family = AF_INET;
1807
1808         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1809                 goto nla_put_failure;
1810
1811         if ((all || type == NETCONFA_FORWARDING) &&
1812             nla_put_s32(skb, NETCONFA_FORWARDING,
1813                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1814                 goto nla_put_failure;
1815         if ((all || type == NETCONFA_RP_FILTER) &&
1816             nla_put_s32(skb, NETCONFA_RP_FILTER,
1817                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1818                 goto nla_put_failure;
1819         if ((all || type == NETCONFA_MC_FORWARDING) &&
1820             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1821                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1822                 goto nla_put_failure;
1823         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1824             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1825                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1826                 goto nla_put_failure;
1827         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1828             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1829                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1830                 goto nla_put_failure;
1831
1832         nlmsg_end(skb, nlh);
1833         return 0;
1834
1835 nla_put_failure:
1836         nlmsg_cancel(skb, nlh);
1837         return -EMSGSIZE;
1838 }
1839
1840 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1841                                  struct ipv4_devconf *devconf)
1842 {
1843         struct sk_buff *skb;
1844         int err = -ENOBUFS;
1845
1846         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1847         if (!skb)
1848                 goto errout;
1849
1850         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1851                                         RTM_NEWNETCONF, 0, type);
1852         if (err < 0) {
1853                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1854                 WARN_ON(err == -EMSGSIZE);
1855                 kfree_skb(skb);
1856                 goto errout;
1857         }
1858         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1859         return;
1860 errout:
1861         if (err < 0)
1862                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1863 }
1864
1865 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1866         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1867         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1868         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1869         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1870         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1871 };
1872
1873 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1874                                     struct nlmsghdr *nlh)
1875 {
1876         struct net *net = sock_net(in_skb->sk);
1877         struct nlattr *tb[NETCONFA_MAX+1];
1878         struct netconfmsg *ncm;
1879         struct sk_buff *skb;
1880         struct ipv4_devconf *devconf;
1881         struct in_device *in_dev;
1882         struct net_device *dev;
1883         int ifindex;
1884         int err;
1885
1886         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1887                           devconf_ipv4_policy);
1888         if (err < 0)
1889                 goto errout;
1890
1891         err = -EINVAL;
1892         if (!tb[NETCONFA_IFINDEX])
1893                 goto errout;
1894
1895         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1896         switch (ifindex) {
1897         case NETCONFA_IFINDEX_ALL:
1898                 devconf = net->ipv4.devconf_all;
1899                 break;
1900         case NETCONFA_IFINDEX_DEFAULT:
1901                 devconf = net->ipv4.devconf_dflt;
1902                 break;
1903         default:
1904                 dev = __dev_get_by_index(net, ifindex);
1905                 if (!dev)
1906                         goto errout;
1907                 in_dev = __in_dev_get_rtnl(dev);
1908                 if (!in_dev)
1909                         goto errout;
1910                 devconf = &in_dev->cnf;
1911                 break;
1912         }
1913
1914         err = -ENOBUFS;
1915         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1916         if (!skb)
1917                 goto errout;
1918
1919         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1920                                         NETLINK_CB(in_skb).portid,
1921                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1922                                         NETCONFA_ALL);
1923         if (err < 0) {
1924                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1925                 WARN_ON(err == -EMSGSIZE);
1926                 kfree_skb(skb);
1927                 goto errout;
1928         }
1929         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1930 errout:
1931         return err;
1932 }
1933
1934 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1935                                      struct netlink_callback *cb)
1936 {
1937         struct net *net = sock_net(skb->sk);
1938         int h, s_h;
1939         int idx, s_idx;
1940         struct net_device *dev;
1941         struct in_device *in_dev;
1942         struct hlist_head *head;
1943
1944         s_h = cb->args[0];
1945         s_idx = idx = cb->args[1];
1946
1947         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1948                 idx = 0;
1949                 head = &net->dev_index_head[h];
1950                 rcu_read_lock();
1951                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1952                           net->dev_base_seq;
1953                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1954                         if (idx < s_idx)
1955                                 goto cont;
1956                         in_dev = __in_dev_get_rcu(dev);
1957                         if (!in_dev)
1958                                 goto cont;
1959
1960                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1961                                                       &in_dev->cnf,
1962                                                       NETLINK_CB(cb->skb).portid,
1963                                                       cb->nlh->nlmsg_seq,
1964                                                       RTM_NEWNETCONF,
1965                                                       NLM_F_MULTI,
1966                                                       NETCONFA_ALL) < 0) {
1967                                 rcu_read_unlock();
1968                                 goto done;
1969                         }
1970                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1971 cont:
1972                         idx++;
1973                 }
1974                 rcu_read_unlock();
1975         }
1976         if (h == NETDEV_HASHENTRIES) {
1977                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1978                                               net->ipv4.devconf_all,
1979                                               NETLINK_CB(cb->skb).portid,
1980                                               cb->nlh->nlmsg_seq,
1981                                               RTM_NEWNETCONF, NLM_F_MULTI,
1982                                               NETCONFA_ALL) < 0)
1983                         goto done;
1984                 else
1985                         h++;
1986         }
1987         if (h == NETDEV_HASHENTRIES + 1) {
1988                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1989                                               net->ipv4.devconf_dflt,
1990                                               NETLINK_CB(cb->skb).portid,
1991                                               cb->nlh->nlmsg_seq,
1992                                               RTM_NEWNETCONF, NLM_F_MULTI,
1993                                               NETCONFA_ALL) < 0)
1994                         goto done;
1995                 else
1996                         h++;
1997         }
1998 done:
1999         cb->args[0] = h;
2000         cb->args[1] = idx;
2001
2002         return skb->len;
2003 }
2004
2005 #ifdef CONFIG_SYSCTL
2006
2007 static void devinet_copy_dflt_conf(struct net *net, int i)
2008 {
2009         struct net_device *dev;
2010
2011         rcu_read_lock();
2012         for_each_netdev_rcu(net, dev) {
2013                 struct in_device *in_dev;
2014
2015                 in_dev = __in_dev_get_rcu(dev);
2016                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2017                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2018         }
2019         rcu_read_unlock();
2020 }
2021
2022 /* called with RTNL locked */
2023 static void inet_forward_change(struct net *net)
2024 {
2025         struct net_device *dev;
2026         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2027
2028         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2029         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2030         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2031                                     NETCONFA_IFINDEX_ALL,
2032                                     net->ipv4.devconf_all);
2033         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2034                                     NETCONFA_IFINDEX_DEFAULT,
2035                                     net->ipv4.devconf_dflt);
2036
2037         for_each_netdev(net, dev) {
2038                 struct in_device *in_dev;
2039
2040                 if (on)
2041                         dev_disable_lro(dev);
2042
2043                 in_dev = __in_dev_get_rtnl(dev);
2044                 if (in_dev) {
2045                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2046                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2047                                                     dev->ifindex, &in_dev->cnf);
2048                 }
2049         }
2050 }
2051
2052 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2053 {
2054         if (cnf == net->ipv4.devconf_dflt)
2055                 return NETCONFA_IFINDEX_DEFAULT;
2056         else if (cnf == net->ipv4.devconf_all)
2057                 return NETCONFA_IFINDEX_ALL;
2058         else {
2059                 struct in_device *idev
2060                         = container_of(cnf, struct in_device, cnf);
2061                 return idev->dev->ifindex;
2062         }
2063 }
2064
2065 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2066                              void __user *buffer,
2067                              size_t *lenp, loff_t *ppos)
2068 {
2069         int old_value = *(int *)ctl->data;
2070         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2071         int new_value = *(int *)ctl->data;
2072
2073         if (write) {
2074                 struct ipv4_devconf *cnf = ctl->extra1;
2075                 struct net *net = ctl->extra2;
2076                 int i = (int *)ctl->data - cnf->data;
2077                 int ifindex;
2078
2079                 set_bit(i, cnf->state);
2080
2081                 if (cnf == net->ipv4.devconf_dflt)
2082                         devinet_copy_dflt_conf(net, i);
2083                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2084                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2085                         if ((new_value == 0) && (old_value != 0))
2086                                 rt_cache_flush(net);
2087
2088                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2089                     new_value != old_value) {
2090                         ifindex = devinet_conf_ifindex(net, cnf);
2091                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2092                                                     ifindex, cnf);
2093                 }
2094                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2095                     new_value != old_value) {
2096                         ifindex = devinet_conf_ifindex(net, cnf);
2097                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2098                                                     ifindex, cnf);
2099                 }
2100                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2101                     new_value != old_value) {
2102                         ifindex = devinet_conf_ifindex(net, cnf);
2103                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2104                                                     ifindex, cnf);
2105                 }
2106         }
2107
2108         return ret;
2109 }
2110
2111 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2112                                   void __user *buffer,
2113                                   size_t *lenp, loff_t *ppos)
2114 {
2115         int *valp = ctl->data;
2116         int val = *valp;
2117         loff_t pos = *ppos;
2118         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2119
2120         if (write && *valp != val) {
2121                 struct net *net = ctl->extra2;
2122
2123                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2124                         if (!rtnl_trylock()) {
2125                                 /* Restore the original values before restarting */
2126                                 *valp = val;
2127                                 *ppos = pos;
2128                                 return restart_syscall();
2129                         }
2130                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2131                                 inet_forward_change(net);
2132                         } else {
2133                                 struct ipv4_devconf *cnf = ctl->extra1;
2134                                 struct in_device *idev =
2135                                         container_of(cnf, struct in_device, cnf);
2136                                 if (*valp)
2137                                         dev_disable_lro(idev->dev);
2138                                 inet_netconf_notify_devconf(net,
2139                                                             NETCONFA_FORWARDING,
2140                                                             idev->dev->ifindex,
2141                                                             cnf);
2142                         }
2143                         rtnl_unlock();
2144                         rt_cache_flush(net);
2145                 } else
2146                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2147                                                     NETCONFA_IFINDEX_DEFAULT,
2148                                                     net->ipv4.devconf_dflt);
2149         }
2150
2151         return ret;
2152 }
2153
2154 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2155                                 void __user *buffer,
2156                                 size_t *lenp, loff_t *ppos)
2157 {
2158         int *valp = ctl->data;
2159         int val = *valp;
2160         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2161         struct net *net = ctl->extra2;
2162
2163         if (write && *valp != val)
2164                 rt_cache_flush(net);
2165
2166         return ret;
2167 }
2168
2169 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2170         { \
2171                 .procname       = name, \
2172                 .data           = ipv4_devconf.data + \
2173                                   IPV4_DEVCONF_ ## attr - 1, \
2174                 .maxlen         = sizeof(int), \
2175                 .mode           = mval, \
2176                 .proc_handler   = proc, \
2177                 .extra1         = &ipv4_devconf, \
2178         }
2179
2180 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2181         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2182
2183 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2184         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2185
2186 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2187         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2188
2189 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2190         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2191
2192 static struct devinet_sysctl_table {
2193         struct ctl_table_header *sysctl_header;
2194         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2195 } devinet_sysctl = {
2196         .devinet_vars = {
2197                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2198                                              devinet_sysctl_forward),
2199                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2200
2201                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2202                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2203                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2204                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2205                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2206                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2207                                         "accept_source_route"),
2208                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2209                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2210                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2211                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2212                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2213                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2214                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2215                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2216                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2217                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2218                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2219                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2220                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2221                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2222                                         "force_igmp_version"),
2223                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2224                                         "igmpv2_unsolicited_report_interval"),
2225                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2226                                         "igmpv3_unsolicited_report_interval"),
2227                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2228                                         "ignore_routes_with_linkdown"),
2229                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2230                                         "drop_gratuitous_arp"),
2231
2232                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2233                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2234                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2235                                               "promote_secondaries"),
2236                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2237                                               "route_localnet"),
2238                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2239                                               "drop_unicast_in_l2_multicast"),
2240         },
2241 };
2242
2243 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2244                                      int ifindex, struct ipv4_devconf *p)
2245 {
2246         int i;
2247         struct devinet_sysctl_table *t;
2248         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2249
2250         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2251         if (!t)
2252                 goto out;
2253
2254         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2255                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2256                 t->devinet_vars[i].extra1 = p;
2257                 t->devinet_vars[i].extra2 = net;
2258         }
2259
2260         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2261
2262         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2263         if (!t->sysctl_header)
2264                 goto free;
2265
2266         p->sysctl = t;
2267
2268         inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
2269         return 0;
2270
2271 free:
2272         kfree(t);
2273 out:
2274         return -ENOMEM;
2275 }
2276
2277 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2278 {
2279         struct devinet_sysctl_table *t = cnf->sysctl;
2280
2281         if (!t)
2282                 return;
2283
2284         cnf->sysctl = NULL;
2285         unregister_net_sysctl_table(t->sysctl_header);
2286         kfree(t);
2287 }
2288
2289 static int devinet_sysctl_register(struct in_device *idev)
2290 {
2291         int err;
2292
2293         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2294                 return -EINVAL;
2295
2296         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2297         if (err)
2298                 return err;
2299         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2300                                         idev->dev->ifindex, &idev->cnf);
2301         if (err)
2302                 neigh_sysctl_unregister(idev->arp_parms);
2303         return err;
2304 }
2305
2306 static void devinet_sysctl_unregister(struct in_device *idev)
2307 {
2308         __devinet_sysctl_unregister(&idev->cnf);
2309         neigh_sysctl_unregister(idev->arp_parms);
2310 }
2311
2312 static struct ctl_table ctl_forward_entry[] = {
2313         {
2314                 .procname       = "ip_forward",
2315                 .data           = &ipv4_devconf.data[
2316                                         IPV4_DEVCONF_FORWARDING - 1],
2317                 .maxlen         = sizeof(int),
2318                 .mode           = 0644,
2319                 .proc_handler   = devinet_sysctl_forward,
2320                 .extra1         = &ipv4_devconf,
2321                 .extra2         = &init_net,
2322         },
2323         { },
2324 };
2325 #endif
2326
2327 static __net_init int devinet_init_net(struct net *net)
2328 {
2329         int err;
2330         struct ipv4_devconf *all, *dflt;
2331 #ifdef CONFIG_SYSCTL
2332         struct ctl_table *tbl = ctl_forward_entry;
2333         struct ctl_table_header *forw_hdr;
2334 #endif
2335
2336         err = -ENOMEM;
2337         all = &ipv4_devconf;
2338         dflt = &ipv4_devconf_dflt;
2339
2340         if (!net_eq(net, &init_net)) {
2341                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2342                 if (!all)
2343                         goto err_alloc_all;
2344
2345                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2346                 if (!dflt)
2347                         goto err_alloc_dflt;
2348
2349 #ifdef CONFIG_SYSCTL
2350                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2351                 if (!tbl)
2352                         goto err_alloc_ctl;
2353
2354                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2355                 tbl[0].extra1 = all;
2356                 tbl[0].extra2 = net;
2357 #endif
2358         }
2359
2360 #ifdef CONFIG_SYSCTL
2361         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2362         if (err < 0)
2363                 goto err_reg_all;
2364
2365         err = __devinet_sysctl_register(net, "default",
2366                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2367         if (err < 0)
2368                 goto err_reg_dflt;
2369
2370         err = -ENOMEM;
2371         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2372         if (!forw_hdr)
2373                 goto err_reg_ctl;
2374         net->ipv4.forw_hdr = forw_hdr;
2375 #endif
2376
2377         net->ipv4.devconf_all = all;
2378         net->ipv4.devconf_dflt = dflt;
2379         return 0;
2380
2381 #ifdef CONFIG_SYSCTL
2382 err_reg_ctl:
2383         __devinet_sysctl_unregister(dflt);
2384 err_reg_dflt:
2385         __devinet_sysctl_unregister(all);
2386 err_reg_all:
2387         if (tbl != ctl_forward_entry)
2388                 kfree(tbl);
2389 err_alloc_ctl:
2390 #endif
2391         if (dflt != &ipv4_devconf_dflt)
2392                 kfree(dflt);
2393 err_alloc_dflt:
2394         if (all != &ipv4_devconf)
2395                 kfree(all);
2396 err_alloc_all:
2397         return err;
2398 }
2399
2400 static __net_exit void devinet_exit_net(struct net *net)
2401 {
2402 #ifdef CONFIG_SYSCTL
2403         struct ctl_table *tbl;
2404
2405         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2406         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2407         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2408         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2409         kfree(tbl);
2410 #endif
2411         kfree(net->ipv4.devconf_dflt);
2412         kfree(net->ipv4.devconf_all);
2413 }
2414
2415 static __net_initdata struct pernet_operations devinet_ops = {
2416         .init = devinet_init_net,
2417         .exit = devinet_exit_net,
2418 };
2419
2420 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2421         .family           = AF_INET,
2422         .fill_link_af     = inet_fill_link_af,
2423         .get_link_af_size = inet_get_link_af_size,
2424         .validate_link_af = inet_validate_link_af,
2425         .set_link_af      = inet_set_link_af,
2426 };
2427
2428 void __init devinet_init(void)
2429 {
2430         int i;
2431
2432         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2433                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2434
2435         register_pernet_subsys(&devinet_ops);
2436
2437         register_gifconf(PF_INET, inet_gifconf);
2438         register_netdevice_notifier(&ip_netdev_notifier);
2439
2440         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2441
2442         rtnl_af_register(&inet_af_ops);
2443
2444         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2445         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2446         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2447         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2448                       inet_netconf_dump_devconf, NULL);
2449 }