GNU Linux-libre 4.14.295-gnu1
[releases.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #define IPV6ONLY_FLAGS  \
70                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
71                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
72                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
73
74 static struct ipv4_devconf ipv4_devconf = {
75         .data = {
76                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
81                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
82         },
83 };
84
85 static struct ipv4_devconf ipv4_devconf_dflt = {
86         .data = {
87                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
88                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
89                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
90                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
91                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
92                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
93                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
94         },
95 };
96
97 #define IPV4_DEVCONF_DFLT(net, attr) \
98         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
99
100 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
101         [IFA_LOCAL]             = { .type = NLA_U32 },
102         [IFA_ADDRESS]           = { .type = NLA_U32 },
103         [IFA_BROADCAST]         = { .type = NLA_U32 },
104         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
105         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
106         [IFA_FLAGS]             = { .type = NLA_U32 },
107 };
108
109 #define IN4_ADDR_HSIZE_SHIFT    8
110 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
111
112 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
113
114 static u32 inet_addr_hash(const struct net *net, __be32 addr)
115 {
116         u32 val = (__force u32) addr ^ net_hash_mix(net);
117
118         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
119 }
120
121 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
122 {
123         u32 hash = inet_addr_hash(net, ifa->ifa_local);
124
125         ASSERT_RTNL();
126         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
127 }
128
129 static void inet_hash_remove(struct in_ifaddr *ifa)
130 {
131         ASSERT_RTNL();
132         hlist_del_init_rcu(&ifa->hash);
133 }
134
135 /**
136  * __ip_dev_find - find the first device with a given source address.
137  * @net: the net namespace
138  * @addr: the source address
139  * @devref: if true, take a reference on the found device
140  *
141  * If a caller uses devref=false, it should be protected by RCU, or RTNL
142  */
143 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
144 {
145         u32 hash = inet_addr_hash(net, addr);
146         struct net_device *result = NULL;
147         struct in_ifaddr *ifa;
148
149         rcu_read_lock();
150         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
151                 if (ifa->ifa_local == addr) {
152                         struct net_device *dev = ifa->ifa_dev->dev;
153
154                         if (!net_eq(dev_net(dev), net))
155                                 continue;
156                         result = dev;
157                         break;
158                 }
159         }
160         if (!result) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         }
174         if (result && devref)
175                 dev_hold(result);
176         rcu_read_unlock();
177         return result;
178 }
179 EXPORT_SYMBOL(__ip_dev_find);
180
181 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
182
183 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
185 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
186                          int destroy);
187 #ifdef CONFIG_SYSCTL
188 static int devinet_sysctl_register(struct in_device *idev);
189 static void devinet_sysctl_unregister(struct in_device *idev);
190 #else
191 static int devinet_sysctl_register(struct in_device *idev)
192 {
193         return 0;
194 }
195 static void devinet_sysctl_unregister(struct in_device *idev)
196 {
197 }
198 #endif
199
200 /* Locks all the inet devices. */
201
202 static struct in_ifaddr *inet_alloc_ifa(void)
203 {
204         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
205 }
206
207 static void inet_rcu_free_ifa(struct rcu_head *head)
208 {
209         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
210         if (ifa->ifa_dev)
211                 in_dev_put(ifa->ifa_dev);
212         kfree(ifa);
213 }
214
215 static void inet_free_ifa(struct in_ifaddr *ifa)
216 {
217         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
218 }
219
220 void in_dev_finish_destroy(struct in_device *idev)
221 {
222         struct net_device *dev = idev->dev;
223
224         WARN_ON(idev->ifa_list);
225         WARN_ON(idev->mc_list);
226         kfree(rcu_dereference_protected(idev->mc_hash, 1));
227 #ifdef NET_REFCNT_DEBUG
228         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
229 #endif
230         dev_put(dev);
231         if (!idev->dead)
232                 pr_err("Freeing alive in_device %p\n", idev);
233         else
234                 kfree(idev);
235 }
236 EXPORT_SYMBOL(in_dev_finish_destroy);
237
238 static struct in_device *inetdev_init(struct net_device *dev)
239 {
240         struct in_device *in_dev;
241         int err = -ENOMEM;
242
243         ASSERT_RTNL();
244
245         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
246         if (!in_dev)
247                 goto out;
248         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
249                         sizeof(in_dev->cnf));
250         in_dev->cnf.sysctl = NULL;
251         in_dev->dev = dev;
252         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
253         if (!in_dev->arp_parms)
254                 goto out_kfree;
255         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
256                 dev_disable_lro(dev);
257         /* Reference in_dev->dev */
258         dev_hold(dev);
259         /* Account for reference dev->ip_ptr (below) */
260         refcount_set(&in_dev->refcnt, 1);
261
262         err = devinet_sysctl_register(in_dev);
263         if (err) {
264                 in_dev->dead = 1;
265                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451         struct in_validator_info ivi;
452         int ret;
453
454         ASSERT_RTNL();
455
456         if (!ifa->ifa_local) {
457                 inet_free_ifa(ifa);
458                 return 0;
459         }
460
461         ifa->ifa_flags &= ~IFA_F_SECONDARY;
462         last_primary = &in_dev->ifa_list;
463
464         /* Don't set IPv6 only flags to IPv4 addresses */
465         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
466
467         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
468              ifap = &ifa1->ifa_next) {
469                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
470                     ifa->ifa_scope <= ifa1->ifa_scope)
471                         last_primary = &ifa1->ifa_next;
472                 if (ifa1->ifa_mask == ifa->ifa_mask &&
473                     inet_ifa_match(ifa1->ifa_address, ifa)) {
474                         if (ifa1->ifa_local == ifa->ifa_local) {
475                                 inet_free_ifa(ifa);
476                                 return -EEXIST;
477                         }
478                         if (ifa1->ifa_scope != ifa->ifa_scope) {
479                                 inet_free_ifa(ifa);
480                                 return -EINVAL;
481                         }
482                         ifa->ifa_flags |= IFA_F_SECONDARY;
483                 }
484         }
485
486         /* Allow any devices that wish to register ifaddr validtors to weigh
487          * in now, before changes are committed.  The rntl lock is serializing
488          * access here, so the state should not change between a validator call
489          * and a final notify on commit.  This isn't invoked on promotion under
490          * the assumption that validators are checking the address itself, and
491          * not the flags.
492          */
493         ivi.ivi_addr = ifa->ifa_address;
494         ivi.ivi_dev = ifa->ifa_dev;
495         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
496                                            NETDEV_UP, &ivi);
497         ret = notifier_to_errno(ret);
498         if (ret) {
499                 inet_free_ifa(ifa);
500                 return ret;
501         }
502
503         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
504                 prandom_seed((__force u32) ifa->ifa_local);
505                 ifap = last_primary;
506         }
507
508         ifa->ifa_next = *ifap;
509         *ifap = ifa;
510
511         inet_hash_insert(dev_net(in_dev->dev), ifa);
512
513         cancel_delayed_work(&check_lifetime_work);
514         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
515
516         /* Send message first, then call notifier.
517            Notifier will trigger FIB update, so that
518            listeners of netlink will know about new ifaddr */
519         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
520         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
521
522         return 0;
523 }
524
525 static int inet_insert_ifa(struct in_ifaddr *ifa)
526 {
527         return __inet_insert_ifa(ifa, NULL, 0);
528 }
529
530 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
531 {
532         struct in_device *in_dev = __in_dev_get_rtnl(dev);
533
534         ASSERT_RTNL();
535
536         if (!in_dev) {
537                 inet_free_ifa(ifa);
538                 return -ENOBUFS;
539         }
540         ipv4_devconf_setall(in_dev);
541         neigh_parms_data_state_setall(in_dev->arp_parms);
542         if (ifa->ifa_dev != in_dev) {
543                 WARN_ON(ifa->ifa_dev);
544                 in_dev_hold(in_dev);
545                 ifa->ifa_dev = in_dev;
546         }
547         if (ipv4_is_loopback(ifa->ifa_local))
548                 ifa->ifa_scope = RT_SCOPE_HOST;
549         return inet_insert_ifa(ifa);
550 }
551
552 /* Caller must hold RCU or RTNL :
553  * We dont take a reference on found in_device
554  */
555 struct in_device *inetdev_by_index(struct net *net, int ifindex)
556 {
557         struct net_device *dev;
558         struct in_device *in_dev = NULL;
559
560         rcu_read_lock();
561         dev = dev_get_by_index_rcu(net, ifindex);
562         if (dev)
563                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
564         rcu_read_unlock();
565         return in_dev;
566 }
567 EXPORT_SYMBOL(inetdev_by_index);
568
569 /* Called only from RTNL semaphored context. No locks. */
570
571 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
572                                     __be32 mask)
573 {
574         ASSERT_RTNL();
575
576         for_primary_ifa(in_dev) {
577                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
578                         return ifa;
579         } endfor_ifa(in_dev);
580         return NULL;
581 }
582
583 static int ip_mc_autojoin_config(struct net *net, bool join,
584                                  const struct in_ifaddr *ifa)
585 {
586 #if defined(CONFIG_IP_MULTICAST)
587         struct ip_mreqn mreq = {
588                 .imr_multiaddr.s_addr = ifa->ifa_address,
589                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
590         };
591         struct sock *sk = net->ipv4.mc_autojoin_sk;
592         int ret;
593
594         ASSERT_RTNL();
595
596         lock_sock(sk);
597         if (join)
598                 ret = ip_mc_join_group(sk, &mreq);
599         else
600                 ret = ip_mc_leave_group(sk, &mreq);
601         release_sock(sk);
602
603         return ret;
604 #else
605         return -EOPNOTSUPP;
606 #endif
607 }
608
609 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
610                             struct netlink_ext_ack *extack)
611 {
612         struct net *net = sock_net(skb->sk);
613         struct nlattr *tb[IFA_MAX+1];
614         struct in_device *in_dev;
615         struct ifaddrmsg *ifm;
616         struct in_ifaddr *ifa, **ifap;
617         int err = -EINVAL;
618
619         ASSERT_RTNL();
620
621         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
622                           extack);
623         if (err < 0)
624                 goto errout;
625
626         ifm = nlmsg_data(nlh);
627         in_dev = inetdev_by_index(net, ifm->ifa_index);
628         if (!in_dev) {
629                 err = -ENODEV;
630                 goto errout;
631         }
632
633         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
634              ifap = &ifa->ifa_next) {
635                 if (tb[IFA_LOCAL] &&
636                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
637                         continue;
638
639                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
640                         continue;
641
642                 if (tb[IFA_ADDRESS] &&
643                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
644                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
645                         continue;
646
647                 if (ipv4_is_multicast(ifa->ifa_address))
648                         ip_mc_autojoin_config(net, false, ifa);
649                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
650                 return 0;
651         }
652
653         err = -EADDRNOTAVAIL;
654 errout:
655         return err;
656 }
657
658 #define INFINITY_LIFE_TIME      0xFFFFFFFF
659
660 static void check_lifetime(struct work_struct *work)
661 {
662         unsigned long now, next, next_sec, next_sched;
663         struct in_ifaddr *ifa;
664         struct hlist_node *n;
665         int i;
666
667         now = jiffies;
668         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
669
670         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
671                 bool change_needed = false;
672
673                 rcu_read_lock();
674                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
675                         unsigned long age;
676
677                         if (ifa->ifa_flags & IFA_F_PERMANENT)
678                                 continue;
679
680                         /* We try to batch several events at once. */
681                         age = (now - ifa->ifa_tstamp +
682                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
683
684                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
685                             age >= ifa->ifa_valid_lft) {
686                                 change_needed = true;
687                         } else if (ifa->ifa_preferred_lft ==
688                                    INFINITY_LIFE_TIME) {
689                                 continue;
690                         } else if (age >= ifa->ifa_preferred_lft) {
691                                 if (time_before(ifa->ifa_tstamp +
692                                                 ifa->ifa_valid_lft * HZ, next))
693                                         next = ifa->ifa_tstamp +
694                                                ifa->ifa_valid_lft * HZ;
695
696                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
697                                         change_needed = true;
698                         } else if (time_before(ifa->ifa_tstamp +
699                                                ifa->ifa_preferred_lft * HZ,
700                                                next)) {
701                                 next = ifa->ifa_tstamp +
702                                        ifa->ifa_preferred_lft * HZ;
703                         }
704                 }
705                 rcu_read_unlock();
706                 if (!change_needed)
707                         continue;
708                 rtnl_lock();
709                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
710                         unsigned long age;
711
712                         if (ifa->ifa_flags & IFA_F_PERMANENT)
713                                 continue;
714
715                         /* We try to batch several events at once. */
716                         age = (now - ifa->ifa_tstamp +
717                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
718
719                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
720                             age >= ifa->ifa_valid_lft) {
721                                 struct in_ifaddr **ifap;
722
723                                 for (ifap = &ifa->ifa_dev->ifa_list;
724                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
725                                         if (*ifap == ifa) {
726                                                 inet_del_ifa(ifa->ifa_dev,
727                                                              ifap, 1);
728                                                 break;
729                                         }
730                                 }
731                         } else if (ifa->ifa_preferred_lft !=
732                                    INFINITY_LIFE_TIME &&
733                                    age >= ifa->ifa_preferred_lft &&
734                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
735                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
736                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
737                         }
738                 }
739                 rtnl_unlock();
740         }
741
742         next_sec = round_jiffies_up(next);
743         next_sched = next;
744
745         /* If rounded timeout is accurate enough, accept it. */
746         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
747                 next_sched = next_sec;
748
749         now = jiffies;
750         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
751         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
752                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
753
754         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
755                         next_sched - now);
756 }
757
758 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
759                              __u32 prefered_lft)
760 {
761         unsigned long timeout;
762
763         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
764
765         timeout = addrconf_timeout_fixup(valid_lft, HZ);
766         if (addrconf_finite_timeout(timeout))
767                 ifa->ifa_valid_lft = timeout;
768         else
769                 ifa->ifa_flags |= IFA_F_PERMANENT;
770
771         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
772         if (addrconf_finite_timeout(timeout)) {
773                 if (timeout == 0)
774                         ifa->ifa_flags |= IFA_F_DEPRECATED;
775                 ifa->ifa_preferred_lft = timeout;
776         }
777         ifa->ifa_tstamp = jiffies;
778         if (!ifa->ifa_cstamp)
779                 ifa->ifa_cstamp = ifa->ifa_tstamp;
780 }
781
782 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
783                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
784 {
785         struct nlattr *tb[IFA_MAX+1];
786         struct in_ifaddr *ifa;
787         struct ifaddrmsg *ifm;
788         struct net_device *dev;
789         struct in_device *in_dev;
790         int err;
791
792         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
793                           NULL);
794         if (err < 0)
795                 goto errout;
796
797         ifm = nlmsg_data(nlh);
798         err = -EINVAL;
799         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
800                 goto errout;
801
802         dev = __dev_get_by_index(net, ifm->ifa_index);
803         err = -ENODEV;
804         if (!dev)
805                 goto errout;
806
807         in_dev = __in_dev_get_rtnl(dev);
808         err = -ENOBUFS;
809         if (!in_dev)
810                 goto errout;
811
812         ifa = inet_alloc_ifa();
813         if (!ifa)
814                 /*
815                  * A potential indev allocation can be left alive, it stays
816                  * assigned to its device and is destroy with it.
817                  */
818                 goto errout;
819
820         ipv4_devconf_setall(in_dev);
821         neigh_parms_data_state_setall(in_dev->arp_parms);
822         in_dev_hold(in_dev);
823
824         if (!tb[IFA_ADDRESS])
825                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
826
827         INIT_HLIST_NODE(&ifa->hash);
828         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
829         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
830         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
831                                          ifm->ifa_flags;
832         ifa->ifa_scope = ifm->ifa_scope;
833         ifa->ifa_dev = in_dev;
834
835         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
836         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
837
838         if (tb[IFA_BROADCAST])
839                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
840
841         if (tb[IFA_LABEL])
842                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
843         else
844                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
845
846         if (tb[IFA_CACHEINFO]) {
847                 struct ifa_cacheinfo *ci;
848
849                 ci = nla_data(tb[IFA_CACHEINFO]);
850                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
851                         err = -EINVAL;
852                         goto errout_free;
853                 }
854                 *pvalid_lft = ci->ifa_valid;
855                 *pprefered_lft = ci->ifa_prefered;
856         }
857
858         return ifa;
859
860 errout_free:
861         inet_free_ifa(ifa);
862 errout:
863         return ERR_PTR(err);
864 }
865
866 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
867 {
868         struct in_device *in_dev = ifa->ifa_dev;
869         struct in_ifaddr *ifa1, **ifap;
870
871         if (!ifa->ifa_local)
872                 return NULL;
873
874         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
875              ifap = &ifa1->ifa_next) {
876                 if (ifa1->ifa_mask == ifa->ifa_mask &&
877                     inet_ifa_match(ifa1->ifa_address, ifa) &&
878                     ifa1->ifa_local == ifa->ifa_local)
879                         return ifa1;
880         }
881         return NULL;
882 }
883
884 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
885                             struct netlink_ext_ack *extack)
886 {
887         struct net *net = sock_net(skb->sk);
888         struct in_ifaddr *ifa;
889         struct in_ifaddr *ifa_existing;
890         __u32 valid_lft = INFINITY_LIFE_TIME;
891         __u32 prefered_lft = INFINITY_LIFE_TIME;
892
893         ASSERT_RTNL();
894
895         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
896         if (IS_ERR(ifa))
897                 return PTR_ERR(ifa);
898
899         ifa_existing = find_matching_ifa(ifa);
900         if (!ifa_existing) {
901                 /* It would be best to check for !NLM_F_CREATE here but
902                  * userspace already relies on not having to provide this.
903                  */
904                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
905                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
906                         int ret = ip_mc_autojoin_config(net, true, ifa);
907
908                         if (ret < 0) {
909                                 inet_free_ifa(ifa);
910                                 return ret;
911                         }
912                 }
913                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
914         } else {
915                 inet_free_ifa(ifa);
916
917                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
918                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
919                         return -EEXIST;
920                 ifa = ifa_existing;
921                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
922                 cancel_delayed_work(&check_lifetime_work);
923                 queue_delayed_work(system_power_efficient_wq,
924                                 &check_lifetime_work, 0);
925                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
926         }
927         return 0;
928 }
929
930 /*
931  *      Determine a default network mask, based on the IP address.
932  */
933
934 static int inet_abc_len(__be32 addr)
935 {
936         int rc = -1;    /* Something else, probably a multicast. */
937
938         if (ipv4_is_zeronet(addr))
939                 rc = 0;
940         else {
941                 __u32 haddr = ntohl(addr);
942
943                 if (IN_CLASSA(haddr))
944                         rc = 8;
945                 else if (IN_CLASSB(haddr))
946                         rc = 16;
947                 else if (IN_CLASSC(haddr))
948                         rc = 24;
949         }
950
951         return rc;
952 }
953
954
955 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
956 {
957         struct ifreq ifr;
958         struct sockaddr_in sin_orig;
959         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
960         struct in_device *in_dev;
961         struct in_ifaddr **ifap = NULL;
962         struct in_ifaddr *ifa = NULL;
963         struct net_device *dev;
964         char *colon;
965         int ret = -EFAULT;
966         int tryaddrmatch = 0;
967
968         /*
969          *      Fetch the caller's info block into kernel space
970          */
971
972         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
973                 goto out;
974         ifr.ifr_name[IFNAMSIZ - 1] = 0;
975
976         /* save original address for comparison */
977         memcpy(&sin_orig, sin, sizeof(*sin));
978
979         colon = strchr(ifr.ifr_name, ':');
980         if (colon)
981                 *colon = 0;
982
983         dev_load(net, ifr.ifr_name);
984
985         switch (cmd) {
986         case SIOCGIFADDR:       /* Get interface address */
987         case SIOCGIFBRDADDR:    /* Get the broadcast address */
988         case SIOCGIFDSTADDR:    /* Get the destination address */
989         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
990                 /* Note that these ioctls will not sleep,
991                    so that we do not impose a lock.
992                    One day we will be forced to put shlock here (I mean SMP)
993                  */
994                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
995                 memset(sin, 0, sizeof(*sin));
996                 sin->sin_family = AF_INET;
997                 break;
998
999         case SIOCSIFFLAGS:
1000                 ret = -EPERM;
1001                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1002                         goto out;
1003                 break;
1004         case SIOCSIFADDR:       /* Set interface address (and family) */
1005         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1006         case SIOCSIFDSTADDR:    /* Set the destination address */
1007         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1008                 ret = -EPERM;
1009                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1010                         goto out;
1011                 ret = -EINVAL;
1012                 if (sin->sin_family != AF_INET)
1013                         goto out;
1014                 break;
1015         default:
1016                 ret = -EINVAL;
1017                 goto out;
1018         }
1019
1020         rtnl_lock();
1021
1022         ret = -ENODEV;
1023         dev = __dev_get_by_name(net, ifr.ifr_name);
1024         if (!dev)
1025                 goto done;
1026
1027         if (colon)
1028                 *colon = ':';
1029
1030         in_dev = __in_dev_get_rtnl(dev);
1031         if (in_dev) {
1032                 if (tryaddrmatch) {
1033                         /* Matthias Andree */
1034                         /* compare label and address (4.4BSD style) */
1035                         /* note: we only do this for a limited set of ioctls
1036                            and only if the original address family was AF_INET.
1037                            This is checked above. */
1038                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1039                              ifap = &ifa->ifa_next) {
1040                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1041                                     sin_orig.sin_addr.s_addr ==
1042                                                         ifa->ifa_local) {
1043                                         break; /* found */
1044                                 }
1045                         }
1046                 }
1047                 /* we didn't get a match, maybe the application is
1048                    4.3BSD-style and passed in junk so we fall back to
1049                    comparing just the label */
1050                 if (!ifa) {
1051                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1052                              ifap = &ifa->ifa_next)
1053                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1054                                         break;
1055                 }
1056         }
1057
1058         ret = -EADDRNOTAVAIL;
1059         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1060                 goto done;
1061
1062         switch (cmd) {
1063         case SIOCGIFADDR:       /* Get interface address */
1064                 sin->sin_addr.s_addr = ifa->ifa_local;
1065                 goto rarok;
1066
1067         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1068                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1069                 goto rarok;
1070
1071         case SIOCGIFDSTADDR:    /* Get the destination address */
1072                 sin->sin_addr.s_addr = ifa->ifa_address;
1073                 goto rarok;
1074
1075         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1076                 sin->sin_addr.s_addr = ifa->ifa_mask;
1077                 goto rarok;
1078
1079         case SIOCSIFFLAGS:
1080                 if (colon) {
1081                         ret = -EADDRNOTAVAIL;
1082                         if (!ifa)
1083                                 break;
1084                         ret = 0;
1085                         if (!(ifr.ifr_flags & IFF_UP))
1086                                 inet_del_ifa(in_dev, ifap, 1);
1087                         break;
1088                 }
1089                 ret = dev_change_flags(dev, ifr.ifr_flags);
1090                 break;
1091
1092         case SIOCSIFADDR:       /* Set interface address (and family) */
1093                 ret = -EINVAL;
1094                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1095                         break;
1096
1097                 if (!ifa) {
1098                         ret = -ENOBUFS;
1099                         ifa = inet_alloc_ifa();
1100                         if (!ifa)
1101                                 break;
1102                         INIT_HLIST_NODE(&ifa->hash);
1103                         if (colon)
1104                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1105                         else
1106                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1107                 } else {
1108                         ret = 0;
1109                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1110                                 break;
1111                         inet_del_ifa(in_dev, ifap, 0);
1112                         ifa->ifa_broadcast = 0;
1113                         ifa->ifa_scope = 0;
1114                 }
1115
1116                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1117
1118                 if (!(dev->flags & IFF_POINTOPOINT)) {
1119                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1120                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1121                         if ((dev->flags & IFF_BROADCAST) &&
1122                             ifa->ifa_prefixlen < 31)
1123                                 ifa->ifa_broadcast = ifa->ifa_address |
1124                                                      ~ifa->ifa_mask;
1125                 } else {
1126                         ifa->ifa_prefixlen = 32;
1127                         ifa->ifa_mask = inet_make_mask(32);
1128                 }
1129                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1130                 ret = inet_set_ifa(dev, ifa);
1131                 break;
1132
1133         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1134                 ret = 0;
1135                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1136                         inet_del_ifa(in_dev, ifap, 0);
1137                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1138                         inet_insert_ifa(ifa);
1139                 }
1140                 break;
1141
1142         case SIOCSIFDSTADDR:    /* Set the destination address */
1143                 ret = 0;
1144                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1145                         break;
1146                 ret = -EINVAL;
1147                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1148                         break;
1149                 ret = 0;
1150                 inet_del_ifa(in_dev, ifap, 0);
1151                 ifa->ifa_address = sin->sin_addr.s_addr;
1152                 inet_insert_ifa(ifa);
1153                 break;
1154
1155         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1156
1157                 /*
1158                  *      The mask we set must be legal.
1159                  */
1160                 ret = -EINVAL;
1161                 if (bad_mask(sin->sin_addr.s_addr, 0))
1162                         break;
1163                 ret = 0;
1164                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1165                         __be32 old_mask = ifa->ifa_mask;
1166                         inet_del_ifa(in_dev, ifap, 0);
1167                         ifa->ifa_mask = sin->sin_addr.s_addr;
1168                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1169
1170                         /* See if current broadcast address matches
1171                          * with current netmask, then recalculate
1172                          * the broadcast address. Otherwise it's a
1173                          * funny address, so don't touch it since
1174                          * the user seems to know what (s)he's doing...
1175                          */
1176                         if ((dev->flags & IFF_BROADCAST) &&
1177                             (ifa->ifa_prefixlen < 31) &&
1178                             (ifa->ifa_broadcast ==
1179                              (ifa->ifa_local|~old_mask))) {
1180                                 ifa->ifa_broadcast = (ifa->ifa_local |
1181                                                       ~sin->sin_addr.s_addr);
1182                         }
1183                         inet_insert_ifa(ifa);
1184                 }
1185                 break;
1186         }
1187 done:
1188         rtnl_unlock();
1189 out:
1190         return ret;
1191 rarok:
1192         rtnl_unlock();
1193         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1194         goto out;
1195 }
1196
1197 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1198 {
1199         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1200         struct in_ifaddr *ifa;
1201         struct ifreq ifr;
1202         int done = 0;
1203
1204         if (!in_dev)
1205                 goto out;
1206
1207         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1208                 if (!buf) {
1209                         done += sizeof(ifr);
1210                         continue;
1211                 }
1212                 if (len < (int) sizeof(ifr))
1213                         break;
1214                 memset(&ifr, 0, sizeof(struct ifreq));
1215                 strcpy(ifr.ifr_name, ifa->ifa_label);
1216
1217                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1218                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1219                                                                 ifa->ifa_local;
1220
1221                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1222                         done = -EFAULT;
1223                         break;
1224                 }
1225                 buf  += sizeof(struct ifreq);
1226                 len  -= sizeof(struct ifreq);
1227                 done += sizeof(struct ifreq);
1228         }
1229 out:
1230         return done;
1231 }
1232
1233 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1234                                  int scope)
1235 {
1236         for_primary_ifa(in_dev) {
1237                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1238                     ifa->ifa_scope <= scope)
1239                         return ifa->ifa_local;
1240         } endfor_ifa(in_dev);
1241
1242         return 0;
1243 }
1244
1245 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1246 {
1247         __be32 addr = 0;
1248         struct in_device *in_dev;
1249         struct net *net = dev_net(dev);
1250         int master_idx;
1251
1252         rcu_read_lock();
1253         in_dev = __in_dev_get_rcu(dev);
1254         if (!in_dev)
1255                 goto no_in_dev;
1256
1257         for_primary_ifa(in_dev) {
1258                 if (ifa->ifa_scope > scope)
1259                         continue;
1260                 if (!dst || inet_ifa_match(dst, ifa)) {
1261                         addr = ifa->ifa_local;
1262                         break;
1263                 }
1264                 if (!addr)
1265                         addr = ifa->ifa_local;
1266         } endfor_ifa(in_dev);
1267
1268         if (addr)
1269                 goto out_unlock;
1270 no_in_dev:
1271         master_idx = l3mdev_master_ifindex_rcu(dev);
1272
1273         /* For VRFs, the VRF device takes the place of the loopback device,
1274          * with addresses on it being preferred.  Note in such cases the
1275          * loopback device will be among the devices that fail the master_idx
1276          * equality check in the loop below.
1277          */
1278         if (master_idx &&
1279             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1280             (in_dev = __in_dev_get_rcu(dev))) {
1281                 addr = in_dev_select_addr(in_dev, scope);
1282                 if (addr)
1283                         goto out_unlock;
1284         }
1285
1286         /* Not loopback addresses on loopback should be preferred
1287            in this case. It is important that lo is the first interface
1288            in dev_base list.
1289          */
1290         for_each_netdev_rcu(net, dev) {
1291                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1292                         continue;
1293
1294                 in_dev = __in_dev_get_rcu(dev);
1295                 if (!in_dev)
1296                         continue;
1297
1298                 addr = in_dev_select_addr(in_dev, scope);
1299                 if (addr)
1300                         goto out_unlock;
1301         }
1302 out_unlock:
1303         rcu_read_unlock();
1304         return addr;
1305 }
1306 EXPORT_SYMBOL(inet_select_addr);
1307
1308 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1309                               __be32 local, int scope)
1310 {
1311         int same = 0;
1312         __be32 addr = 0;
1313
1314         for_ifa(in_dev) {
1315                 if (!addr &&
1316                     (local == ifa->ifa_local || !local) &&
1317                     ifa->ifa_scope <= scope) {
1318                         addr = ifa->ifa_local;
1319                         if (same)
1320                                 break;
1321                 }
1322                 if (!same) {
1323                         same = (!local || inet_ifa_match(local, ifa)) &&
1324                                 (!dst || inet_ifa_match(dst, ifa));
1325                         if (same && addr) {
1326                                 if (local || !dst)
1327                                         break;
1328                                 /* Is the selected addr into dst subnet? */
1329                                 if (inet_ifa_match(addr, ifa))
1330                                         break;
1331                                 /* No, then can we use new local src? */
1332                                 if (ifa->ifa_scope <= scope) {
1333                                         addr = ifa->ifa_local;
1334                                         break;
1335                                 }
1336                                 /* search for large dst subnet for addr */
1337                                 same = 0;
1338                         }
1339                 }
1340         } endfor_ifa(in_dev);
1341
1342         return same ? addr : 0;
1343 }
1344
1345 /*
1346  * Confirm that local IP address exists using wildcards:
1347  * - net: netns to check, cannot be NULL
1348  * - in_dev: only on this interface, NULL=any interface
1349  * - dst: only in the same subnet as dst, 0=any dst
1350  * - local: address, 0=autoselect the local address
1351  * - scope: maximum allowed scope value for the local address
1352  */
1353 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1354                          __be32 dst, __be32 local, int scope)
1355 {
1356         __be32 addr = 0;
1357         struct net_device *dev;
1358
1359         if (in_dev)
1360                 return confirm_addr_indev(in_dev, dst, local, scope);
1361
1362         rcu_read_lock();
1363         for_each_netdev_rcu(net, dev) {
1364                 in_dev = __in_dev_get_rcu(dev);
1365                 if (in_dev) {
1366                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1367                         if (addr)
1368                                 break;
1369                 }
1370         }
1371         rcu_read_unlock();
1372
1373         return addr;
1374 }
1375 EXPORT_SYMBOL(inet_confirm_addr);
1376
1377 /*
1378  *      Device notifier
1379  */
1380
1381 int register_inetaddr_notifier(struct notifier_block *nb)
1382 {
1383         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1384 }
1385 EXPORT_SYMBOL(register_inetaddr_notifier);
1386
1387 int unregister_inetaddr_notifier(struct notifier_block *nb)
1388 {
1389         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1390 }
1391 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1392
1393 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1394 {
1395         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1396 }
1397 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1398
1399 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1400 {
1401         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1402             nb);
1403 }
1404 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1405
1406 /* Rename ifa_labels for a device name change. Make some effort to preserve
1407  * existing alias numbering and to create unique labels if possible.
1408 */
1409 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1410 {
1411         struct in_ifaddr *ifa;
1412         int named = 0;
1413
1414         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1415                 char old[IFNAMSIZ], *dot;
1416
1417                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1418                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1419                 if (named++ == 0)
1420                         goto skip;
1421                 dot = strchr(old, ':');
1422                 if (!dot) {
1423                         sprintf(old, ":%d", named);
1424                         dot = old;
1425                 }
1426                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1427                         strcat(ifa->ifa_label, dot);
1428                 else
1429                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1430 skip:
1431                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1432         }
1433 }
1434
1435 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1436                                         struct in_device *in_dev)
1437
1438 {
1439         struct in_ifaddr *ifa;
1440
1441         for (ifa = in_dev->ifa_list; ifa;
1442              ifa = ifa->ifa_next) {
1443                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1444                          ifa->ifa_local, dev,
1445                          ifa->ifa_local, NULL,
1446                          dev->dev_addr, NULL);
1447         }
1448 }
1449
1450 /* Called only under RTNL semaphore */
1451
1452 static int inetdev_event(struct notifier_block *this, unsigned long event,
1453                          void *ptr)
1454 {
1455         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1456         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1457
1458         ASSERT_RTNL();
1459
1460         if (!in_dev) {
1461                 if (event == NETDEV_REGISTER) {
1462                         in_dev = inetdev_init(dev);
1463                         if (IS_ERR(in_dev))
1464                                 return notifier_from_errno(PTR_ERR(in_dev));
1465                         if (dev->flags & IFF_LOOPBACK) {
1466                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1467                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1468                         }
1469                 } else if (event == NETDEV_CHANGEMTU) {
1470                         /* Re-enabling IP */
1471                         if (inetdev_valid_mtu(dev->mtu))
1472                                 in_dev = inetdev_init(dev);
1473                 }
1474                 goto out;
1475         }
1476
1477         switch (event) {
1478         case NETDEV_REGISTER:
1479                 pr_debug("%s: bug\n", __func__);
1480                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1481                 break;
1482         case NETDEV_UP:
1483                 if (!inetdev_valid_mtu(dev->mtu))
1484                         break;
1485                 if (dev->flags & IFF_LOOPBACK) {
1486                         struct in_ifaddr *ifa = inet_alloc_ifa();
1487
1488                         if (ifa) {
1489                                 INIT_HLIST_NODE(&ifa->hash);
1490                                 ifa->ifa_local =
1491                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1492                                 ifa->ifa_prefixlen = 8;
1493                                 ifa->ifa_mask = inet_make_mask(8);
1494                                 in_dev_hold(in_dev);
1495                                 ifa->ifa_dev = in_dev;
1496                                 ifa->ifa_scope = RT_SCOPE_HOST;
1497                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1498                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1499                                                  INFINITY_LIFE_TIME);
1500                                 ipv4_devconf_setall(in_dev);
1501                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1502                                 inet_insert_ifa(ifa);
1503                         }
1504                 }
1505                 ip_mc_up(in_dev);
1506                 /* fall through */
1507         case NETDEV_CHANGEADDR:
1508                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1509                         break;
1510                 /* fall through */
1511         case NETDEV_NOTIFY_PEERS:
1512                 /* Send gratuitous ARP to notify of link change */
1513                 inetdev_send_gratuitous_arp(dev, in_dev);
1514                 break;
1515         case NETDEV_DOWN:
1516                 ip_mc_down(in_dev);
1517                 break;
1518         case NETDEV_PRE_TYPE_CHANGE:
1519                 ip_mc_unmap(in_dev);
1520                 break;
1521         case NETDEV_POST_TYPE_CHANGE:
1522                 ip_mc_remap(in_dev);
1523                 break;
1524         case NETDEV_CHANGEMTU:
1525                 if (inetdev_valid_mtu(dev->mtu))
1526                         break;
1527                 /* disable IP when MTU is not enough */
1528         case NETDEV_UNREGISTER:
1529                 inetdev_destroy(in_dev);
1530                 break;
1531         case NETDEV_CHANGENAME:
1532                 /* Do not notify about label change, this event is
1533                  * not interesting to applications using netlink.
1534                  */
1535                 inetdev_changename(dev, in_dev);
1536
1537                 devinet_sysctl_unregister(in_dev);
1538                 devinet_sysctl_register(in_dev);
1539                 break;
1540         }
1541 out:
1542         return NOTIFY_DONE;
1543 }
1544
1545 static struct notifier_block ip_netdev_notifier = {
1546         .notifier_call = inetdev_event,
1547 };
1548
1549 static size_t inet_nlmsg_size(void)
1550 {
1551         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1552                + nla_total_size(4) /* IFA_ADDRESS */
1553                + nla_total_size(4) /* IFA_LOCAL */
1554                + nla_total_size(4) /* IFA_BROADCAST */
1555                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1556                + nla_total_size(4)  /* IFA_FLAGS */
1557                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1558 }
1559
1560 static inline u32 cstamp_delta(unsigned long cstamp)
1561 {
1562         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1563 }
1564
1565 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1566                          unsigned long tstamp, u32 preferred, u32 valid)
1567 {
1568         struct ifa_cacheinfo ci;
1569
1570         ci.cstamp = cstamp_delta(cstamp);
1571         ci.tstamp = cstamp_delta(tstamp);
1572         ci.ifa_prefered = preferred;
1573         ci.ifa_valid = valid;
1574
1575         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1576 }
1577
1578 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1579                             u32 portid, u32 seq, int event, unsigned int flags)
1580 {
1581         struct ifaddrmsg *ifm;
1582         struct nlmsghdr  *nlh;
1583         u32 preferred, valid;
1584
1585         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1586         if (!nlh)
1587                 return -EMSGSIZE;
1588
1589         ifm = nlmsg_data(nlh);
1590         ifm->ifa_family = AF_INET;
1591         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1592         ifm->ifa_flags = ifa->ifa_flags;
1593         ifm->ifa_scope = ifa->ifa_scope;
1594         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1595
1596         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1597                 preferred = ifa->ifa_preferred_lft;
1598                 valid = ifa->ifa_valid_lft;
1599                 if (preferred != INFINITY_LIFE_TIME) {
1600                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1601
1602                         if (preferred > tval)
1603                                 preferred -= tval;
1604                         else
1605                                 preferred = 0;
1606                         if (valid != INFINITY_LIFE_TIME) {
1607                                 if (valid > tval)
1608                                         valid -= tval;
1609                                 else
1610                                         valid = 0;
1611                         }
1612                 }
1613         } else {
1614                 preferred = INFINITY_LIFE_TIME;
1615                 valid = INFINITY_LIFE_TIME;
1616         }
1617         if ((ifa->ifa_address &&
1618              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1619             (ifa->ifa_local &&
1620              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1621             (ifa->ifa_broadcast &&
1622              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1623             (ifa->ifa_label[0] &&
1624              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1625             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1626             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1627                           preferred, valid))
1628                 goto nla_put_failure;
1629
1630         nlmsg_end(skb, nlh);
1631         return 0;
1632
1633 nla_put_failure:
1634         nlmsg_cancel(skb, nlh);
1635         return -EMSGSIZE;
1636 }
1637
1638 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1639 {
1640         struct net *net = sock_net(skb->sk);
1641         int h, s_h;
1642         int idx, s_idx;
1643         int ip_idx, s_ip_idx;
1644         struct net_device *dev;
1645         struct in_device *in_dev;
1646         struct in_ifaddr *ifa;
1647         struct hlist_head *head;
1648
1649         s_h = cb->args[0];
1650         s_idx = idx = cb->args[1];
1651         s_ip_idx = ip_idx = cb->args[2];
1652
1653         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1654                 idx = 0;
1655                 head = &net->dev_index_head[h];
1656                 rcu_read_lock();
1657                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1658                           net->dev_base_seq;
1659                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1660                         if (idx < s_idx)
1661                                 goto cont;
1662                         if (h > s_h || idx > s_idx)
1663                                 s_ip_idx = 0;
1664                         in_dev = __in_dev_get_rcu(dev);
1665                         if (!in_dev)
1666                                 goto cont;
1667
1668                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1669                              ifa = ifa->ifa_next, ip_idx++) {
1670                                 if (ip_idx < s_ip_idx)
1671                                         continue;
1672                                 if (inet_fill_ifaddr(skb, ifa,
1673                                              NETLINK_CB(cb->skb).portid,
1674                                              cb->nlh->nlmsg_seq,
1675                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1676                                         rcu_read_unlock();
1677                                         goto done;
1678                                 }
1679                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1680                         }
1681 cont:
1682                         idx++;
1683                 }
1684                 rcu_read_unlock();
1685         }
1686
1687 done:
1688         cb->args[0] = h;
1689         cb->args[1] = idx;
1690         cb->args[2] = ip_idx;
1691
1692         return skb->len;
1693 }
1694
1695 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1696                       u32 portid)
1697 {
1698         struct sk_buff *skb;
1699         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1700         int err = -ENOBUFS;
1701         struct net *net;
1702
1703         net = dev_net(ifa->ifa_dev->dev);
1704         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1705         if (!skb)
1706                 goto errout;
1707
1708         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1709         if (err < 0) {
1710                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1711                 WARN_ON(err == -EMSGSIZE);
1712                 kfree_skb(skb);
1713                 goto errout;
1714         }
1715         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1716         return;
1717 errout:
1718         if (err < 0)
1719                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1720 }
1721
1722 static size_t inet_get_link_af_size(const struct net_device *dev,
1723                                     u32 ext_filter_mask)
1724 {
1725         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1726
1727         if (!in_dev)
1728                 return 0;
1729
1730         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1731 }
1732
1733 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1734                              u32 ext_filter_mask)
1735 {
1736         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1737         struct nlattr *nla;
1738         int i;
1739
1740         if (!in_dev)
1741                 return -ENODATA;
1742
1743         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1744         if (!nla)
1745                 return -EMSGSIZE;
1746
1747         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1748                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1749
1750         return 0;
1751 }
1752
1753 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1754         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1755 };
1756
1757 static int inet_validate_link_af(const struct net_device *dev,
1758                                  const struct nlattr *nla)
1759 {
1760         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1761         int err, rem;
1762
1763         if (dev && !__in_dev_get_rtnl(dev))
1764                 return -EAFNOSUPPORT;
1765
1766         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1767         if (err < 0)
1768                 return err;
1769
1770         if (tb[IFLA_INET_CONF]) {
1771                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1772                         int cfgid = nla_type(a);
1773
1774                         if (nla_len(a) < 4)
1775                                 return -EINVAL;
1776
1777                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1778                                 return -EINVAL;
1779                 }
1780         }
1781
1782         return 0;
1783 }
1784
1785 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1786 {
1787         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1788         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1789         int rem;
1790
1791         if (!in_dev)
1792                 return -EAFNOSUPPORT;
1793
1794         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1795                 BUG();
1796
1797         if (tb[IFLA_INET_CONF]) {
1798                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1799                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1800         }
1801
1802         return 0;
1803 }
1804
1805 static int inet_netconf_msgsize_devconf(int type)
1806 {
1807         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1808                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1809         bool all = false;
1810
1811         if (type == NETCONFA_ALL)
1812                 all = true;
1813
1814         if (all || type == NETCONFA_FORWARDING)
1815                 size += nla_total_size(4);
1816         if (all || type == NETCONFA_RP_FILTER)
1817                 size += nla_total_size(4);
1818         if (all || type == NETCONFA_MC_FORWARDING)
1819                 size += nla_total_size(4);
1820         if (all || type == NETCONFA_PROXY_NEIGH)
1821                 size += nla_total_size(4);
1822         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1823                 size += nla_total_size(4);
1824
1825         return size;
1826 }
1827
1828 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1829                                      struct ipv4_devconf *devconf, u32 portid,
1830                                      u32 seq, int event, unsigned int flags,
1831                                      int type)
1832 {
1833         struct nlmsghdr  *nlh;
1834         struct netconfmsg *ncm;
1835         bool all = false;
1836
1837         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1838                         flags);
1839         if (!nlh)
1840                 return -EMSGSIZE;
1841
1842         if (type == NETCONFA_ALL)
1843                 all = true;
1844
1845         ncm = nlmsg_data(nlh);
1846         ncm->ncm_family = AF_INET;
1847
1848         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1849                 goto nla_put_failure;
1850
1851         if (!devconf)
1852                 goto out;
1853
1854         if ((all || type == NETCONFA_FORWARDING) &&
1855             nla_put_s32(skb, NETCONFA_FORWARDING,
1856                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1857                 goto nla_put_failure;
1858         if ((all || type == NETCONFA_RP_FILTER) &&
1859             nla_put_s32(skb, NETCONFA_RP_FILTER,
1860                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1861                 goto nla_put_failure;
1862         if ((all || type == NETCONFA_MC_FORWARDING) &&
1863             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1864                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1865                 goto nla_put_failure;
1866         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1867             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1868                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1869                 goto nla_put_failure;
1870         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1871             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1872                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1873                 goto nla_put_failure;
1874
1875 out:
1876         nlmsg_end(skb, nlh);
1877         return 0;
1878
1879 nla_put_failure:
1880         nlmsg_cancel(skb, nlh);
1881         return -EMSGSIZE;
1882 }
1883
1884 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1885                                  int ifindex, struct ipv4_devconf *devconf)
1886 {
1887         struct sk_buff *skb;
1888         int err = -ENOBUFS;
1889
1890         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1891         if (!skb)
1892                 goto errout;
1893
1894         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1895                                         event, 0, type);
1896         if (err < 0) {
1897                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1898                 WARN_ON(err == -EMSGSIZE);
1899                 kfree_skb(skb);
1900                 goto errout;
1901         }
1902         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1903         return;
1904 errout:
1905         if (err < 0)
1906                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1907 }
1908
1909 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1910         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1911         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1912         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1913         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1914         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1915 };
1916
1917 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1918                                     struct nlmsghdr *nlh,
1919                                     struct netlink_ext_ack *extack)
1920 {
1921         struct net *net = sock_net(in_skb->sk);
1922         struct nlattr *tb[NETCONFA_MAX+1];
1923         struct netconfmsg *ncm;
1924         struct sk_buff *skb;
1925         struct ipv4_devconf *devconf;
1926         struct in_device *in_dev;
1927         struct net_device *dev;
1928         int ifindex;
1929         int err;
1930
1931         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1932                           devconf_ipv4_policy, extack);
1933         if (err < 0)
1934                 goto errout;
1935
1936         err = -EINVAL;
1937         if (!tb[NETCONFA_IFINDEX])
1938                 goto errout;
1939
1940         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1941         switch (ifindex) {
1942         case NETCONFA_IFINDEX_ALL:
1943                 devconf = net->ipv4.devconf_all;
1944                 break;
1945         case NETCONFA_IFINDEX_DEFAULT:
1946                 devconf = net->ipv4.devconf_dflt;
1947                 break;
1948         default:
1949                 dev = __dev_get_by_index(net, ifindex);
1950                 if (!dev)
1951                         goto errout;
1952                 in_dev = __in_dev_get_rtnl(dev);
1953                 if (!in_dev)
1954                         goto errout;
1955                 devconf = &in_dev->cnf;
1956                 break;
1957         }
1958
1959         err = -ENOBUFS;
1960         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1961         if (!skb)
1962                 goto errout;
1963
1964         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1965                                         NETLINK_CB(in_skb).portid,
1966                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1967                                         NETCONFA_ALL);
1968         if (err < 0) {
1969                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1970                 WARN_ON(err == -EMSGSIZE);
1971                 kfree_skb(skb);
1972                 goto errout;
1973         }
1974         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1975 errout:
1976         return err;
1977 }
1978
1979 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1980                                      struct netlink_callback *cb)
1981 {
1982         struct net *net = sock_net(skb->sk);
1983         int h, s_h;
1984         int idx, s_idx;
1985         struct net_device *dev;
1986         struct in_device *in_dev;
1987         struct hlist_head *head;
1988
1989         s_h = cb->args[0];
1990         s_idx = idx = cb->args[1];
1991
1992         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1993                 idx = 0;
1994                 head = &net->dev_index_head[h];
1995                 rcu_read_lock();
1996                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1997                           net->dev_base_seq;
1998                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1999                         if (idx < s_idx)
2000                                 goto cont;
2001                         in_dev = __in_dev_get_rcu(dev);
2002                         if (!in_dev)
2003                                 goto cont;
2004
2005                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2006                                                       &in_dev->cnf,
2007                                                       NETLINK_CB(cb->skb).portid,
2008                                                       cb->nlh->nlmsg_seq,
2009                                                       RTM_NEWNETCONF,
2010                                                       NLM_F_MULTI,
2011                                                       NETCONFA_ALL) < 0) {
2012                                 rcu_read_unlock();
2013                                 goto done;
2014                         }
2015                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2016 cont:
2017                         idx++;
2018                 }
2019                 rcu_read_unlock();
2020         }
2021         if (h == NETDEV_HASHENTRIES) {
2022                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2023                                               net->ipv4.devconf_all,
2024                                               NETLINK_CB(cb->skb).portid,
2025                                               cb->nlh->nlmsg_seq,
2026                                               RTM_NEWNETCONF, NLM_F_MULTI,
2027                                               NETCONFA_ALL) < 0)
2028                         goto done;
2029                 else
2030                         h++;
2031         }
2032         if (h == NETDEV_HASHENTRIES + 1) {
2033                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2034                                               net->ipv4.devconf_dflt,
2035                                               NETLINK_CB(cb->skb).portid,
2036                                               cb->nlh->nlmsg_seq,
2037                                               RTM_NEWNETCONF, NLM_F_MULTI,
2038                                               NETCONFA_ALL) < 0)
2039                         goto done;
2040                 else
2041                         h++;
2042         }
2043 done:
2044         cb->args[0] = h;
2045         cb->args[1] = idx;
2046
2047         return skb->len;
2048 }
2049
2050 #ifdef CONFIG_SYSCTL
2051
2052 static void devinet_copy_dflt_conf(struct net *net, int i)
2053 {
2054         struct net_device *dev;
2055
2056         rcu_read_lock();
2057         for_each_netdev_rcu(net, dev) {
2058                 struct in_device *in_dev;
2059
2060                 in_dev = __in_dev_get_rcu(dev);
2061                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2062                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2063         }
2064         rcu_read_unlock();
2065 }
2066
2067 /* called with RTNL locked */
2068 static void inet_forward_change(struct net *net)
2069 {
2070         struct net_device *dev;
2071         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2072
2073         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2074         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2075         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2076                                     NETCONFA_FORWARDING,
2077                                     NETCONFA_IFINDEX_ALL,
2078                                     net->ipv4.devconf_all);
2079         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2080                                     NETCONFA_FORWARDING,
2081                                     NETCONFA_IFINDEX_DEFAULT,
2082                                     net->ipv4.devconf_dflt);
2083
2084         for_each_netdev(net, dev) {
2085                 struct in_device *in_dev;
2086
2087                 if (on)
2088                         dev_disable_lro(dev);
2089
2090                 in_dev = __in_dev_get_rtnl(dev);
2091                 if (in_dev) {
2092                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2093                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2094                                                     NETCONFA_FORWARDING,
2095                                                     dev->ifindex, &in_dev->cnf);
2096                 }
2097         }
2098 }
2099
2100 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2101 {
2102         if (cnf == net->ipv4.devconf_dflt)
2103                 return NETCONFA_IFINDEX_DEFAULT;
2104         else if (cnf == net->ipv4.devconf_all)
2105                 return NETCONFA_IFINDEX_ALL;
2106         else {
2107                 struct in_device *idev
2108                         = container_of(cnf, struct in_device, cnf);
2109                 return idev->dev->ifindex;
2110         }
2111 }
2112
2113 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2114                              void __user *buffer,
2115                              size_t *lenp, loff_t *ppos)
2116 {
2117         int old_value = *(int *)ctl->data;
2118         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2119         int new_value = *(int *)ctl->data;
2120
2121         if (write) {
2122                 struct ipv4_devconf *cnf = ctl->extra1;
2123                 struct net *net = ctl->extra2;
2124                 int i = (int *)ctl->data - cnf->data;
2125                 int ifindex;
2126
2127                 set_bit(i, cnf->state);
2128
2129                 if (cnf == net->ipv4.devconf_dflt)
2130                         devinet_copy_dflt_conf(net, i);
2131                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2132                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2133                         if ((new_value == 0) && (old_value != 0))
2134                                 rt_cache_flush(net);
2135
2136                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2137                     new_value != old_value) {
2138                         ifindex = devinet_conf_ifindex(net, cnf);
2139                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2140                                                     NETCONFA_RP_FILTER,
2141                                                     ifindex, cnf);
2142                 }
2143                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2144                     new_value != old_value) {
2145                         ifindex = devinet_conf_ifindex(net, cnf);
2146                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2147                                                     NETCONFA_PROXY_NEIGH,
2148                                                     ifindex, cnf);
2149                 }
2150                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2151                     new_value != old_value) {
2152                         ifindex = devinet_conf_ifindex(net, cnf);
2153                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2154                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2155                                                     ifindex, cnf);
2156                 }
2157         }
2158
2159         return ret;
2160 }
2161
2162 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2163                                   void __user *buffer,
2164                                   size_t *lenp, loff_t *ppos)
2165 {
2166         int *valp = ctl->data;
2167         int val = *valp;
2168         loff_t pos = *ppos;
2169         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2170
2171         if (write && *valp != val) {
2172                 struct net *net = ctl->extra2;
2173
2174                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2175                         if (!rtnl_trylock()) {
2176                                 /* Restore the original values before restarting */
2177                                 *valp = val;
2178                                 *ppos = pos;
2179                                 return restart_syscall();
2180                         }
2181                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2182                                 inet_forward_change(net);
2183                         } else {
2184                                 struct ipv4_devconf *cnf = ctl->extra1;
2185                                 struct in_device *idev =
2186                                         container_of(cnf, struct in_device, cnf);
2187                                 if (*valp)
2188                                         dev_disable_lro(idev->dev);
2189                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2190                                                             NETCONFA_FORWARDING,
2191                                                             idev->dev->ifindex,
2192                                                             cnf);
2193                         }
2194                         rtnl_unlock();
2195                         rt_cache_flush(net);
2196                 } else
2197                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2198                                                     NETCONFA_FORWARDING,
2199                                                     NETCONFA_IFINDEX_DEFAULT,
2200                                                     net->ipv4.devconf_dflt);
2201         }
2202
2203         return ret;
2204 }
2205
2206 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2207                                 void __user *buffer,
2208                                 size_t *lenp, loff_t *ppos)
2209 {
2210         int *valp = ctl->data;
2211         int val = *valp;
2212         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2213         struct net *net = ctl->extra2;
2214
2215         if (write && *valp != val)
2216                 rt_cache_flush(net);
2217
2218         return ret;
2219 }
2220
2221 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2222         { \
2223                 .procname       = name, \
2224                 .data           = ipv4_devconf.data + \
2225                                   IPV4_DEVCONF_ ## attr - 1, \
2226                 .maxlen         = sizeof(int), \
2227                 .mode           = mval, \
2228                 .proc_handler   = proc, \
2229                 .extra1         = &ipv4_devconf, \
2230         }
2231
2232 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2233         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2234
2235 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2236         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2237
2238 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2239         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2240
2241 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2242         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2243
2244 static struct devinet_sysctl_table {
2245         struct ctl_table_header *sysctl_header;
2246         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2247 } devinet_sysctl = {
2248         .devinet_vars = {
2249                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2250                                              devinet_sysctl_forward),
2251                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2252
2253                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2254                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2255                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2256                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2257                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2258                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2259                                         "accept_source_route"),
2260                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2261                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2262                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2263                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2264                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2265                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2266                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2267                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2268                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2269                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2270                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2271                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2272                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2273                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2274                                         "force_igmp_version"),
2275                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2276                                         "igmpv2_unsolicited_report_interval"),
2277                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2278                                         "igmpv3_unsolicited_report_interval"),
2279                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2280                                         "ignore_routes_with_linkdown"),
2281                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2282                                         "drop_gratuitous_arp"),
2283
2284                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2285                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2286                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2287                                               "promote_secondaries"),
2288                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2289                                               "route_localnet"),
2290                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2291                                               "drop_unicast_in_l2_multicast"),
2292         },
2293 };
2294
2295 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2296                                      int ifindex, struct ipv4_devconf *p)
2297 {
2298         int i;
2299         struct devinet_sysctl_table *t;
2300         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2301
2302         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2303         if (!t)
2304                 goto out;
2305
2306         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2307                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2308                 t->devinet_vars[i].extra1 = p;
2309                 t->devinet_vars[i].extra2 = net;
2310         }
2311
2312         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2313
2314         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2315         if (!t->sysctl_header)
2316                 goto free;
2317
2318         p->sysctl = t;
2319
2320         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2321                                     ifindex, p);
2322         return 0;
2323
2324 free:
2325         kfree(t);
2326 out:
2327         return -ENOMEM;
2328 }
2329
2330 static void __devinet_sysctl_unregister(struct net *net,
2331                                         struct ipv4_devconf *cnf, int ifindex)
2332 {
2333         struct devinet_sysctl_table *t = cnf->sysctl;
2334
2335         if (t) {
2336                 cnf->sysctl = NULL;
2337                 unregister_net_sysctl_table(t->sysctl_header);
2338                 kfree(t);
2339         }
2340
2341         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2342 }
2343
2344 static int devinet_sysctl_register(struct in_device *idev)
2345 {
2346         int err;
2347
2348         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2349                 return -EINVAL;
2350
2351         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2352         if (err)
2353                 return err;
2354         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2355                                         idev->dev->ifindex, &idev->cnf);
2356         if (err)
2357                 neigh_sysctl_unregister(idev->arp_parms);
2358         return err;
2359 }
2360
2361 static void devinet_sysctl_unregister(struct in_device *idev)
2362 {
2363         struct net *net = dev_net(idev->dev);
2364
2365         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2366         neigh_sysctl_unregister(idev->arp_parms);
2367 }
2368
2369 static struct ctl_table ctl_forward_entry[] = {
2370         {
2371                 .procname       = "ip_forward",
2372                 .data           = &ipv4_devconf.data[
2373                                         IPV4_DEVCONF_FORWARDING - 1],
2374                 .maxlen         = sizeof(int),
2375                 .mode           = 0644,
2376                 .proc_handler   = devinet_sysctl_forward,
2377                 .extra1         = &ipv4_devconf,
2378                 .extra2         = &init_net,
2379         },
2380         { },
2381 };
2382 #endif
2383
2384 static __net_init int devinet_init_net(struct net *net)
2385 {
2386         int err;
2387         struct ipv4_devconf *all, *dflt;
2388 #ifdef CONFIG_SYSCTL
2389         struct ctl_table *tbl = ctl_forward_entry;
2390         struct ctl_table_header *forw_hdr;
2391 #endif
2392
2393         err = -ENOMEM;
2394         all = &ipv4_devconf;
2395         dflt = &ipv4_devconf_dflt;
2396
2397         if (!net_eq(net, &init_net)) {
2398                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2399                 if (!all)
2400                         goto err_alloc_all;
2401
2402                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2403                 if (!dflt)
2404                         goto err_alloc_dflt;
2405
2406 #ifdef CONFIG_SYSCTL
2407                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2408                 if (!tbl)
2409                         goto err_alloc_ctl;
2410
2411                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2412                 tbl[0].extra1 = all;
2413                 tbl[0].extra2 = net;
2414 #endif
2415         }
2416
2417 #ifdef CONFIG_SYSCTL
2418         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2419         if (err < 0)
2420                 goto err_reg_all;
2421
2422         err = __devinet_sysctl_register(net, "default",
2423                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2424         if (err < 0)
2425                 goto err_reg_dflt;
2426
2427         err = -ENOMEM;
2428         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2429         if (!forw_hdr)
2430                 goto err_reg_ctl;
2431         net->ipv4.forw_hdr = forw_hdr;
2432 #endif
2433
2434         net->ipv4.devconf_all = all;
2435         net->ipv4.devconf_dflt = dflt;
2436         return 0;
2437
2438 #ifdef CONFIG_SYSCTL
2439 err_reg_ctl:
2440         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2441 err_reg_dflt:
2442         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2443 err_reg_all:
2444         if (tbl != ctl_forward_entry)
2445                 kfree(tbl);
2446 err_alloc_ctl:
2447 #endif
2448         if (dflt != &ipv4_devconf_dflt)
2449                 kfree(dflt);
2450 err_alloc_dflt:
2451         if (all != &ipv4_devconf)
2452                 kfree(all);
2453 err_alloc_all:
2454         return err;
2455 }
2456
2457 static __net_exit void devinet_exit_net(struct net *net)
2458 {
2459 #ifdef CONFIG_SYSCTL
2460         struct ctl_table *tbl;
2461
2462         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2463         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2464         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2465                                     NETCONFA_IFINDEX_DEFAULT);
2466         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2467                                     NETCONFA_IFINDEX_ALL);
2468         kfree(tbl);
2469 #endif
2470         kfree(net->ipv4.devconf_dflt);
2471         kfree(net->ipv4.devconf_all);
2472 }
2473
2474 static __net_initdata struct pernet_operations devinet_ops = {
2475         .init = devinet_init_net,
2476         .exit = devinet_exit_net,
2477 };
2478
2479 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2480         .family           = AF_INET,
2481         .fill_link_af     = inet_fill_link_af,
2482         .get_link_af_size = inet_get_link_af_size,
2483         .validate_link_af = inet_validate_link_af,
2484         .set_link_af      = inet_set_link_af,
2485 };
2486
2487 void __init devinet_init(void)
2488 {
2489         int i;
2490
2491         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2492                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2493
2494         register_pernet_subsys(&devinet_ops);
2495
2496         register_gifconf(PF_INET, inet_gifconf);
2497         register_netdevice_notifier(&ip_netdev_notifier);
2498
2499         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2500
2501         rtnl_af_register(&inet_af_ops);
2502
2503         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2504         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2505         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2506         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2507                       inet_netconf_dump_devconf, 0);
2508 }