GNU Linux-libre 4.19.211-gnu1
[releases.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 #define IPV6ONLY_FLAGS  \
70                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
71                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
72                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
73
74 static struct ipv4_devconf ipv4_devconf = {
75         .data = {
76                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
77                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
80                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
81                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
82         },
83 };
84
85 static struct ipv4_devconf ipv4_devconf_dflt = {
86         .data = {
87                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
88                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
89                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
90                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
91                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
92                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
93                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
94         },
95 };
96
97 #define IPV4_DEVCONF_DFLT(net, attr) \
98         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
99
100 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
101         [IFA_LOCAL]             = { .type = NLA_U32 },
102         [IFA_ADDRESS]           = { .type = NLA_U32 },
103         [IFA_BROADCAST]         = { .type = NLA_U32 },
104         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
105         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
106         [IFA_FLAGS]             = { .type = NLA_U32 },
107         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
108 };
109
110 #define IN4_ADDR_HSIZE_SHIFT    8
111 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
112
113 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
114
115 static u32 inet_addr_hash(const struct net *net, __be32 addr)
116 {
117         u32 val = (__force u32) addr ^ net_hash_mix(net);
118
119         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
120 }
121
122 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
123 {
124         u32 hash = inet_addr_hash(net, ifa->ifa_local);
125
126         ASSERT_RTNL();
127         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
128 }
129
130 static void inet_hash_remove(struct in_ifaddr *ifa)
131 {
132         ASSERT_RTNL();
133         hlist_del_init_rcu(&ifa->hash);
134 }
135
136 /**
137  * __ip_dev_find - find the first device with a given source address.
138  * @net: the net namespace
139  * @addr: the source address
140  * @devref: if true, take a reference on the found device
141  *
142  * If a caller uses devref=false, it should be protected by RCU, or RTNL
143  */
144 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
145 {
146         struct net_device *result = NULL;
147         struct in_ifaddr *ifa;
148
149         rcu_read_lock();
150         ifa = inet_lookup_ifaddr_rcu(net, addr);
151         if (!ifa) {
152                 struct flowi4 fl4 = { .daddr = addr };
153                 struct fib_result res = { 0 };
154                 struct fib_table *local;
155
156                 /* Fallback to FIB local table so that communication
157                  * over loopback subnets work.
158                  */
159                 local = fib_get_table(net, RT_TABLE_LOCAL);
160                 if (local &&
161                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
162                     res.type == RTN_LOCAL)
163                         result = FIB_RES_DEV(res);
164         } else {
165                 result = ifa->ifa_dev->dev;
166         }
167         if (result && devref)
168                 dev_hold(result);
169         rcu_read_unlock();
170         return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173
174 /* called under RCU lock */
175 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
176 {
177         u32 hash = inet_addr_hash(net, addr);
178         struct in_ifaddr *ifa;
179
180         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
181                 if (ifa->ifa_local == addr &&
182                     net_eq(dev_net(ifa->ifa_dev->dev), net))
183                         return ifa;
184
185         return NULL;
186 }
187
188 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
189
190 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
191 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
192 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
193                          int destroy);
194 #ifdef CONFIG_SYSCTL
195 static int devinet_sysctl_register(struct in_device *idev);
196 static void devinet_sysctl_unregister(struct in_device *idev);
197 #else
198 static int devinet_sysctl_register(struct in_device *idev)
199 {
200         return 0;
201 }
202 static void devinet_sysctl_unregister(struct in_device *idev)
203 {
204 }
205 #endif
206
207 /* Locks all the inet devices. */
208
209 static struct in_ifaddr *inet_alloc_ifa(void)
210 {
211         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
212 }
213
214 static void inet_rcu_free_ifa(struct rcu_head *head)
215 {
216         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
217         if (ifa->ifa_dev)
218                 in_dev_put(ifa->ifa_dev);
219         kfree(ifa);
220 }
221
222 static void inet_free_ifa(struct in_ifaddr *ifa)
223 {
224         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
225 }
226
227 void in_dev_finish_destroy(struct in_device *idev)
228 {
229         struct net_device *dev = idev->dev;
230
231         WARN_ON(idev->ifa_list);
232         WARN_ON(idev->mc_list);
233         kfree(rcu_dereference_protected(idev->mc_hash, 1));
234 #ifdef NET_REFCNT_DEBUG
235         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
236 #endif
237         dev_put(dev);
238         if (!idev->dead)
239                 pr_err("Freeing alive in_device %p\n", idev);
240         else
241                 kfree(idev);
242 }
243 EXPORT_SYMBOL(in_dev_finish_destroy);
244
245 static struct in_device *inetdev_init(struct net_device *dev)
246 {
247         struct in_device *in_dev;
248         int err = -ENOMEM;
249
250         ASSERT_RTNL();
251
252         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
253         if (!in_dev)
254                 goto out;
255         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
256                         sizeof(in_dev->cnf));
257         in_dev->cnf.sysctl = NULL;
258         in_dev->dev = dev;
259         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
260         if (!in_dev->arp_parms)
261                 goto out_kfree;
262         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
263                 dev_disable_lro(dev);
264         /* Reference in_dev->dev */
265         dev_hold(dev);
266         /* Account for reference dev->ip_ptr (below) */
267         refcount_set(&in_dev->refcnt, 1);
268
269         err = devinet_sysctl_register(in_dev);
270         if (err) {
271                 in_dev->dead = 1;
272                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
273                 in_dev_put(in_dev);
274                 in_dev = NULL;
275                 goto out;
276         }
277         ip_mc_init_dev(in_dev);
278         if (dev->flags & IFF_UP)
279                 ip_mc_up(in_dev);
280
281         /* we can receive as soon as ip_ptr is set -- do this last */
282         rcu_assign_pointer(dev->ip_ptr, in_dev);
283 out:
284         return in_dev ?: ERR_PTR(err);
285 out_kfree:
286         kfree(in_dev);
287         in_dev = NULL;
288         goto out;
289 }
290
291 static void in_dev_rcu_put(struct rcu_head *head)
292 {
293         struct in_device *idev = container_of(head, struct in_device, rcu_head);
294         in_dev_put(idev);
295 }
296
297 static void inetdev_destroy(struct in_device *in_dev)
298 {
299         struct in_ifaddr *ifa;
300         struct net_device *dev;
301
302         ASSERT_RTNL();
303
304         dev = in_dev->dev;
305
306         in_dev->dead = 1;
307
308         ip_mc_destroy_dev(in_dev);
309
310         while ((ifa = in_dev->ifa_list) != NULL) {
311                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
312                 inet_free_ifa(ifa);
313         }
314
315         RCU_INIT_POINTER(dev->ip_ptr, NULL);
316
317         devinet_sysctl_unregister(in_dev);
318         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
319         arp_ifdown(dev);
320
321         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
322 }
323
324 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
325 {
326         rcu_read_lock();
327         for_primary_ifa(in_dev) {
328                 if (inet_ifa_match(a, ifa)) {
329                         if (!b || inet_ifa_match(b, ifa)) {
330                                 rcu_read_unlock();
331                                 return 1;
332                         }
333                 }
334         } endfor_ifa(in_dev);
335         rcu_read_unlock();
336         return 0;
337 }
338
339 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
340                          int destroy, struct nlmsghdr *nlh, u32 portid)
341 {
342         struct in_ifaddr *promote = NULL;
343         struct in_ifaddr *ifa, *ifa1 = *ifap;
344         struct in_ifaddr *last_prim = in_dev->ifa_list;
345         struct in_ifaddr *prev_prom = NULL;
346         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
347
348         ASSERT_RTNL();
349
350         if (in_dev->dead)
351                 goto no_promotions;
352
353         /* 1. Deleting primary ifaddr forces deletion all secondaries
354          * unless alias promotion is set
355          **/
356
357         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
358                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
359
360                 while ((ifa = *ifap1) != NULL) {
361                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
362                             ifa1->ifa_scope <= ifa->ifa_scope)
363                                 last_prim = ifa;
364
365                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
366                             ifa1->ifa_mask != ifa->ifa_mask ||
367                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
368                                 ifap1 = &ifa->ifa_next;
369                                 prev_prom = ifa;
370                                 continue;
371                         }
372
373                         if (!do_promote) {
374                                 inet_hash_remove(ifa);
375                                 *ifap1 = ifa->ifa_next;
376
377                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
378                                 blocking_notifier_call_chain(&inetaddr_chain,
379                                                 NETDEV_DOWN, ifa);
380                                 inet_free_ifa(ifa);
381                         } else {
382                                 promote = ifa;
383                                 break;
384                         }
385                 }
386         }
387
388         /* On promotion all secondaries from subnet are changing
389          * the primary IP, we must remove all their routes silently
390          * and later to add them back with new prefsrc. Do this
391          * while all addresses are on the device list.
392          */
393         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
394                 if (ifa1->ifa_mask == ifa->ifa_mask &&
395                     inet_ifa_match(ifa1->ifa_address, ifa))
396                         fib_del_ifaddr(ifa, ifa1);
397         }
398
399 no_promotions:
400         /* 2. Unlink it */
401
402         *ifap = ifa1->ifa_next;
403         inet_hash_remove(ifa1);
404
405         /* 3. Announce address deletion */
406
407         /* Send message first, then call notifier.
408            At first sight, FIB update triggered by notifier
409            will refer to already deleted ifaddr, that could confuse
410            netlink listeners. It is not true: look, gated sees
411            that route deleted and if it still thinks that ifaddr
412            is valid, it will try to restore deleted routes... Grr.
413            So that, this order is correct.
414          */
415         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
416         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
417
418         if (promote) {
419                 struct in_ifaddr *next_sec = promote->ifa_next;
420
421                 if (prev_prom) {
422                         prev_prom->ifa_next = promote->ifa_next;
423                         promote->ifa_next = last_prim->ifa_next;
424                         last_prim->ifa_next = promote;
425                 }
426
427                 promote->ifa_flags &= ~IFA_F_SECONDARY;
428                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
429                 blocking_notifier_call_chain(&inetaddr_chain,
430                                 NETDEV_UP, promote);
431                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
432                         if (ifa1->ifa_mask != ifa->ifa_mask ||
433                             !inet_ifa_match(ifa1->ifa_address, ifa))
434                                         continue;
435                         fib_add_ifaddr(ifa);
436                 }
437
438         }
439         if (destroy)
440                 inet_free_ifa(ifa1);
441 }
442
443 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
444                          int destroy)
445 {
446         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
447 }
448
449 static void check_lifetime(struct work_struct *work);
450
451 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
452
453 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
454                              u32 portid, struct netlink_ext_ack *extack)
455 {
456         struct in_device *in_dev = ifa->ifa_dev;
457         struct in_ifaddr *ifa1, **ifap, **last_primary;
458         struct in_validator_info ivi;
459         int ret;
460
461         ASSERT_RTNL();
462
463         if (!ifa->ifa_local) {
464                 inet_free_ifa(ifa);
465                 return 0;
466         }
467
468         ifa->ifa_flags &= ~IFA_F_SECONDARY;
469         last_primary = &in_dev->ifa_list;
470
471         /* Don't set IPv6 only flags to IPv4 addresses */
472         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
473
474         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
475              ifap = &ifa1->ifa_next) {
476                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
477                     ifa->ifa_scope <= ifa1->ifa_scope)
478                         last_primary = &ifa1->ifa_next;
479                 if (ifa1->ifa_mask == ifa->ifa_mask &&
480                     inet_ifa_match(ifa1->ifa_address, ifa)) {
481                         if (ifa1->ifa_local == ifa->ifa_local) {
482                                 inet_free_ifa(ifa);
483                                 return -EEXIST;
484                         }
485                         if (ifa1->ifa_scope != ifa->ifa_scope) {
486                                 inet_free_ifa(ifa);
487                                 return -EINVAL;
488                         }
489                         ifa->ifa_flags |= IFA_F_SECONDARY;
490                 }
491         }
492
493         /* Allow any devices that wish to register ifaddr validtors to weigh
494          * in now, before changes are committed.  The rntl lock is serializing
495          * access here, so the state should not change between a validator call
496          * and a final notify on commit.  This isn't invoked on promotion under
497          * the assumption that validators are checking the address itself, and
498          * not the flags.
499          */
500         ivi.ivi_addr = ifa->ifa_address;
501         ivi.ivi_dev = ifa->ifa_dev;
502         ivi.extack = extack;
503         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
504                                            NETDEV_UP, &ivi);
505         ret = notifier_to_errno(ret);
506         if (ret) {
507                 inet_free_ifa(ifa);
508                 return ret;
509         }
510
511         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
512                 prandom_seed((__force u32) ifa->ifa_local);
513                 ifap = last_primary;
514         }
515
516         ifa->ifa_next = *ifap;
517         *ifap = ifa;
518
519         inet_hash_insert(dev_net(in_dev->dev), ifa);
520
521         cancel_delayed_work(&check_lifetime_work);
522         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
523
524         /* Send message first, then call notifier.
525            Notifier will trigger FIB update, so that
526            listeners of netlink will know about new ifaddr */
527         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
528         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
529
530         return 0;
531 }
532
533 static int inet_insert_ifa(struct in_ifaddr *ifa)
534 {
535         return __inet_insert_ifa(ifa, NULL, 0, NULL);
536 }
537
538 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
539 {
540         struct in_device *in_dev = __in_dev_get_rtnl(dev);
541
542         ASSERT_RTNL();
543
544         if (!in_dev) {
545                 inet_free_ifa(ifa);
546                 return -ENOBUFS;
547         }
548         ipv4_devconf_setall(in_dev);
549         neigh_parms_data_state_setall(in_dev->arp_parms);
550         if (ifa->ifa_dev != in_dev) {
551                 WARN_ON(ifa->ifa_dev);
552                 in_dev_hold(in_dev);
553                 ifa->ifa_dev = in_dev;
554         }
555         if (ipv4_is_loopback(ifa->ifa_local))
556                 ifa->ifa_scope = RT_SCOPE_HOST;
557         return inet_insert_ifa(ifa);
558 }
559
560 /* Caller must hold RCU or RTNL :
561  * We dont take a reference on found in_device
562  */
563 struct in_device *inetdev_by_index(struct net *net, int ifindex)
564 {
565         struct net_device *dev;
566         struct in_device *in_dev = NULL;
567
568         rcu_read_lock();
569         dev = dev_get_by_index_rcu(net, ifindex);
570         if (dev)
571                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
572         rcu_read_unlock();
573         return in_dev;
574 }
575 EXPORT_SYMBOL(inetdev_by_index);
576
577 /* Called only from RTNL semaphored context. No locks. */
578
579 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
580                                     __be32 mask)
581 {
582         ASSERT_RTNL();
583
584         for_primary_ifa(in_dev) {
585                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
586                         return ifa;
587         } endfor_ifa(in_dev);
588         return NULL;
589 }
590
591 static int ip_mc_autojoin_config(struct net *net, bool join,
592                                  const struct in_ifaddr *ifa)
593 {
594 #if defined(CONFIG_IP_MULTICAST)
595         struct ip_mreqn mreq = {
596                 .imr_multiaddr.s_addr = ifa->ifa_address,
597                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
598         };
599         struct sock *sk = net->ipv4.mc_autojoin_sk;
600         int ret;
601
602         ASSERT_RTNL();
603
604         lock_sock(sk);
605         if (join)
606                 ret = ip_mc_join_group(sk, &mreq);
607         else
608                 ret = ip_mc_leave_group(sk, &mreq);
609         release_sock(sk);
610
611         return ret;
612 #else
613         return -EOPNOTSUPP;
614 #endif
615 }
616
617 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
618                             struct netlink_ext_ack *extack)
619 {
620         struct net *net = sock_net(skb->sk);
621         struct nlattr *tb[IFA_MAX+1];
622         struct in_device *in_dev;
623         struct ifaddrmsg *ifm;
624         struct in_ifaddr *ifa, **ifap;
625         int err = -EINVAL;
626
627         ASSERT_RTNL();
628
629         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
630                           extack);
631         if (err < 0)
632                 goto errout;
633
634         ifm = nlmsg_data(nlh);
635         in_dev = inetdev_by_index(net, ifm->ifa_index);
636         if (!in_dev) {
637                 err = -ENODEV;
638                 goto errout;
639         }
640
641         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
642              ifap = &ifa->ifa_next) {
643                 if (tb[IFA_LOCAL] &&
644                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
645                         continue;
646
647                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
648                         continue;
649
650                 if (tb[IFA_ADDRESS] &&
651                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
652                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
653                         continue;
654
655                 if (ipv4_is_multicast(ifa->ifa_address))
656                         ip_mc_autojoin_config(net, false, ifa);
657                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
658                 return 0;
659         }
660
661         err = -EADDRNOTAVAIL;
662 errout:
663         return err;
664 }
665
666 #define INFINITY_LIFE_TIME      0xFFFFFFFF
667
668 static void check_lifetime(struct work_struct *work)
669 {
670         unsigned long now, next, next_sec, next_sched;
671         struct in_ifaddr *ifa;
672         struct hlist_node *n;
673         int i;
674
675         now = jiffies;
676         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
677
678         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
679                 bool change_needed = false;
680
681                 rcu_read_lock();
682                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
683                         unsigned long age;
684
685                         if (ifa->ifa_flags & IFA_F_PERMANENT)
686                                 continue;
687
688                         /* We try to batch several events at once. */
689                         age = (now - ifa->ifa_tstamp +
690                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
691
692                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
693                             age >= ifa->ifa_valid_lft) {
694                                 change_needed = true;
695                         } else if (ifa->ifa_preferred_lft ==
696                                    INFINITY_LIFE_TIME) {
697                                 continue;
698                         } else if (age >= ifa->ifa_preferred_lft) {
699                                 if (time_before(ifa->ifa_tstamp +
700                                                 ifa->ifa_valid_lft * HZ, next))
701                                         next = ifa->ifa_tstamp +
702                                                ifa->ifa_valid_lft * HZ;
703
704                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
705                                         change_needed = true;
706                         } else if (time_before(ifa->ifa_tstamp +
707                                                ifa->ifa_preferred_lft * HZ,
708                                                next)) {
709                                 next = ifa->ifa_tstamp +
710                                        ifa->ifa_preferred_lft * HZ;
711                         }
712                 }
713                 rcu_read_unlock();
714                 if (!change_needed)
715                         continue;
716                 rtnl_lock();
717                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
718                         unsigned long age;
719
720                         if (ifa->ifa_flags & IFA_F_PERMANENT)
721                                 continue;
722
723                         /* We try to batch several events at once. */
724                         age = (now - ifa->ifa_tstamp +
725                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
726
727                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
728                             age >= ifa->ifa_valid_lft) {
729                                 struct in_ifaddr **ifap;
730
731                                 for (ifap = &ifa->ifa_dev->ifa_list;
732                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
733                                         if (*ifap == ifa) {
734                                                 inet_del_ifa(ifa->ifa_dev,
735                                                              ifap, 1);
736                                                 break;
737                                         }
738                                 }
739                         } else if (ifa->ifa_preferred_lft !=
740                                    INFINITY_LIFE_TIME &&
741                                    age >= ifa->ifa_preferred_lft &&
742                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
743                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
744                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
745                         }
746                 }
747                 rtnl_unlock();
748         }
749
750         next_sec = round_jiffies_up(next);
751         next_sched = next;
752
753         /* If rounded timeout is accurate enough, accept it. */
754         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
755                 next_sched = next_sec;
756
757         now = jiffies;
758         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
759         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
760                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
761
762         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
763                         next_sched - now);
764 }
765
766 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
767                              __u32 prefered_lft)
768 {
769         unsigned long timeout;
770
771         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
772
773         timeout = addrconf_timeout_fixup(valid_lft, HZ);
774         if (addrconf_finite_timeout(timeout))
775                 ifa->ifa_valid_lft = timeout;
776         else
777                 ifa->ifa_flags |= IFA_F_PERMANENT;
778
779         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
780         if (addrconf_finite_timeout(timeout)) {
781                 if (timeout == 0)
782                         ifa->ifa_flags |= IFA_F_DEPRECATED;
783                 ifa->ifa_preferred_lft = timeout;
784         }
785         ifa->ifa_tstamp = jiffies;
786         if (!ifa->ifa_cstamp)
787                 ifa->ifa_cstamp = ifa->ifa_tstamp;
788 }
789
790 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
791                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
792 {
793         struct nlattr *tb[IFA_MAX+1];
794         struct in_ifaddr *ifa;
795         struct ifaddrmsg *ifm;
796         struct net_device *dev;
797         struct in_device *in_dev;
798         int err;
799
800         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
801                           NULL);
802         if (err < 0)
803                 goto errout;
804
805         ifm = nlmsg_data(nlh);
806         err = -EINVAL;
807         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
808                 goto errout;
809
810         dev = __dev_get_by_index(net, ifm->ifa_index);
811         err = -ENODEV;
812         if (!dev)
813                 goto errout;
814
815         in_dev = __in_dev_get_rtnl(dev);
816         err = -ENOBUFS;
817         if (!in_dev)
818                 goto errout;
819
820         ifa = inet_alloc_ifa();
821         if (!ifa)
822                 /*
823                  * A potential indev allocation can be left alive, it stays
824                  * assigned to its device and is destroy with it.
825                  */
826                 goto errout;
827
828         ipv4_devconf_setall(in_dev);
829         neigh_parms_data_state_setall(in_dev->arp_parms);
830         in_dev_hold(in_dev);
831
832         if (!tb[IFA_ADDRESS])
833                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
834
835         INIT_HLIST_NODE(&ifa->hash);
836         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
837         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
838         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
839                                          ifm->ifa_flags;
840         ifa->ifa_scope = ifm->ifa_scope;
841         ifa->ifa_dev = in_dev;
842
843         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
844         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
845
846         if (tb[IFA_BROADCAST])
847                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
848
849         if (tb[IFA_LABEL])
850                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
851         else
852                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
853
854         if (tb[IFA_RT_PRIORITY])
855                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
856
857         if (tb[IFA_CACHEINFO]) {
858                 struct ifa_cacheinfo *ci;
859
860                 ci = nla_data(tb[IFA_CACHEINFO]);
861                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
862                         err = -EINVAL;
863                         goto errout_free;
864                 }
865                 *pvalid_lft = ci->ifa_valid;
866                 *pprefered_lft = ci->ifa_prefered;
867         }
868
869         return ifa;
870
871 errout_free:
872         inet_free_ifa(ifa);
873 errout:
874         return ERR_PTR(err);
875 }
876
877 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
878 {
879         struct in_device *in_dev = ifa->ifa_dev;
880         struct in_ifaddr *ifa1, **ifap;
881
882         if (!ifa->ifa_local)
883                 return NULL;
884
885         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
886              ifap = &ifa1->ifa_next) {
887                 if (ifa1->ifa_mask == ifa->ifa_mask &&
888                     inet_ifa_match(ifa1->ifa_address, ifa) &&
889                     ifa1->ifa_local == ifa->ifa_local)
890                         return ifa1;
891         }
892         return NULL;
893 }
894
895 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
896                             struct netlink_ext_ack *extack)
897 {
898         struct net *net = sock_net(skb->sk);
899         struct in_ifaddr *ifa;
900         struct in_ifaddr *ifa_existing;
901         __u32 valid_lft = INFINITY_LIFE_TIME;
902         __u32 prefered_lft = INFINITY_LIFE_TIME;
903
904         ASSERT_RTNL();
905
906         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
907         if (IS_ERR(ifa))
908                 return PTR_ERR(ifa);
909
910         ifa_existing = find_matching_ifa(ifa);
911         if (!ifa_existing) {
912                 /* It would be best to check for !NLM_F_CREATE here but
913                  * userspace already relies on not having to provide this.
914                  */
915                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
916                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
917                         int ret = ip_mc_autojoin_config(net, true, ifa);
918
919                         if (ret < 0) {
920                                 inet_free_ifa(ifa);
921                                 return ret;
922                         }
923                 }
924                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
925                                          extack);
926         } else {
927                 u32 new_metric = ifa->ifa_rt_priority;
928
929                 inet_free_ifa(ifa);
930
931                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
932                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
933                         return -EEXIST;
934                 ifa = ifa_existing;
935
936                 if (ifa->ifa_rt_priority != new_metric) {
937                         fib_modify_prefix_metric(ifa, new_metric);
938                         ifa->ifa_rt_priority = new_metric;
939                 }
940
941                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
942                 cancel_delayed_work(&check_lifetime_work);
943                 queue_delayed_work(system_power_efficient_wq,
944                                 &check_lifetime_work, 0);
945                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
946         }
947         return 0;
948 }
949
950 /*
951  *      Determine a default network mask, based on the IP address.
952  */
953
954 static int inet_abc_len(__be32 addr)
955 {
956         int rc = -1;    /* Something else, probably a multicast. */
957
958         if (ipv4_is_zeronet(addr))
959                 rc = 0;
960         else {
961                 __u32 haddr = ntohl(addr);
962
963                 if (IN_CLASSA(haddr))
964                         rc = 8;
965                 else if (IN_CLASSB(haddr))
966                         rc = 16;
967                 else if (IN_CLASSC(haddr))
968                         rc = 24;
969         }
970
971         return rc;
972 }
973
974
975 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
976 {
977         struct sockaddr_in sin_orig;
978         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
979         struct in_device *in_dev;
980         struct in_ifaddr **ifap = NULL;
981         struct in_ifaddr *ifa = NULL;
982         struct net_device *dev;
983         char *colon;
984         int ret = -EFAULT;
985         int tryaddrmatch = 0;
986
987         ifr->ifr_name[IFNAMSIZ - 1] = 0;
988
989         /* save original address for comparison */
990         memcpy(&sin_orig, sin, sizeof(*sin));
991
992         colon = strchr(ifr->ifr_name, ':');
993         if (colon)
994                 *colon = 0;
995
996         dev_load(net, ifr->ifr_name);
997
998         switch (cmd) {
999         case SIOCGIFADDR:       /* Get interface address */
1000         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1001         case SIOCGIFDSTADDR:    /* Get the destination address */
1002         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1003                 /* Note that these ioctls will not sleep,
1004                    so that we do not impose a lock.
1005                    One day we will be forced to put shlock here (I mean SMP)
1006                  */
1007                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1008                 memset(sin, 0, sizeof(*sin));
1009                 sin->sin_family = AF_INET;
1010                 break;
1011
1012         case SIOCSIFFLAGS:
1013                 ret = -EPERM;
1014                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1015                         goto out;
1016                 break;
1017         case SIOCSIFADDR:       /* Set interface address (and family) */
1018         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1019         case SIOCSIFDSTADDR:    /* Set the destination address */
1020         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1021                 ret = -EPERM;
1022                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1023                         goto out;
1024                 ret = -EINVAL;
1025                 if (sin->sin_family != AF_INET)
1026                         goto out;
1027                 break;
1028         default:
1029                 ret = -EINVAL;
1030                 goto out;
1031         }
1032
1033         rtnl_lock();
1034
1035         ret = -ENODEV;
1036         dev = __dev_get_by_name(net, ifr->ifr_name);
1037         if (!dev)
1038                 goto done;
1039
1040         if (colon)
1041                 *colon = ':';
1042
1043         in_dev = __in_dev_get_rtnl(dev);
1044         if (in_dev) {
1045                 if (tryaddrmatch) {
1046                         /* Matthias Andree */
1047                         /* compare label and address (4.4BSD style) */
1048                         /* note: we only do this for a limited set of ioctls
1049                            and only if the original address family was AF_INET.
1050                            This is checked above. */
1051                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1052                              ifap = &ifa->ifa_next) {
1053                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1054                                     sin_orig.sin_addr.s_addr ==
1055                                                         ifa->ifa_local) {
1056                                         break; /* found */
1057                                 }
1058                         }
1059                 }
1060                 /* we didn't get a match, maybe the application is
1061                    4.3BSD-style and passed in junk so we fall back to
1062                    comparing just the label */
1063                 if (!ifa) {
1064                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1065                              ifap = &ifa->ifa_next)
1066                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1067                                         break;
1068                 }
1069         }
1070
1071         ret = -EADDRNOTAVAIL;
1072         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1073                 goto done;
1074
1075         switch (cmd) {
1076         case SIOCGIFADDR:       /* Get interface address */
1077                 ret = 0;
1078                 sin->sin_addr.s_addr = ifa->ifa_local;
1079                 break;
1080
1081         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1082                 ret = 0;
1083                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1084                 break;
1085
1086         case SIOCGIFDSTADDR:    /* Get the destination address */
1087                 ret = 0;
1088                 sin->sin_addr.s_addr = ifa->ifa_address;
1089                 break;
1090
1091         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1092                 ret = 0;
1093                 sin->sin_addr.s_addr = ifa->ifa_mask;
1094                 break;
1095
1096         case SIOCSIFFLAGS:
1097                 if (colon) {
1098                         ret = -EADDRNOTAVAIL;
1099                         if (!ifa)
1100                                 break;
1101                         ret = 0;
1102                         if (!(ifr->ifr_flags & IFF_UP))
1103                                 inet_del_ifa(in_dev, ifap, 1);
1104                         break;
1105                 }
1106                 ret = dev_change_flags(dev, ifr->ifr_flags);
1107                 break;
1108
1109         case SIOCSIFADDR:       /* Set interface address (and family) */
1110                 ret = -EINVAL;
1111                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1112                         break;
1113
1114                 if (!ifa) {
1115                         ret = -ENOBUFS;
1116                         ifa = inet_alloc_ifa();
1117                         if (!ifa)
1118                                 break;
1119                         INIT_HLIST_NODE(&ifa->hash);
1120                         if (colon)
1121                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1122                         else
1123                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1124                 } else {
1125                         ret = 0;
1126                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1127                                 break;
1128                         inet_del_ifa(in_dev, ifap, 0);
1129                         ifa->ifa_broadcast = 0;
1130                         ifa->ifa_scope = 0;
1131                 }
1132
1133                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1134
1135                 if (!(dev->flags & IFF_POINTOPOINT)) {
1136                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1137                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1138                         if ((dev->flags & IFF_BROADCAST) &&
1139                             ifa->ifa_prefixlen < 31)
1140                                 ifa->ifa_broadcast = ifa->ifa_address |
1141                                                      ~ifa->ifa_mask;
1142                 } else {
1143                         ifa->ifa_prefixlen = 32;
1144                         ifa->ifa_mask = inet_make_mask(32);
1145                 }
1146                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1147                 ret = inet_set_ifa(dev, ifa);
1148                 break;
1149
1150         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1151                 ret = 0;
1152                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1153                         inet_del_ifa(in_dev, ifap, 0);
1154                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1155                         inet_insert_ifa(ifa);
1156                 }
1157                 break;
1158
1159         case SIOCSIFDSTADDR:    /* Set the destination address */
1160                 ret = 0;
1161                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1162                         break;
1163                 ret = -EINVAL;
1164                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1165                         break;
1166                 ret = 0;
1167                 inet_del_ifa(in_dev, ifap, 0);
1168                 ifa->ifa_address = sin->sin_addr.s_addr;
1169                 inet_insert_ifa(ifa);
1170                 break;
1171
1172         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1173
1174                 /*
1175                  *      The mask we set must be legal.
1176                  */
1177                 ret = -EINVAL;
1178                 if (bad_mask(sin->sin_addr.s_addr, 0))
1179                         break;
1180                 ret = 0;
1181                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1182                         __be32 old_mask = ifa->ifa_mask;
1183                         inet_del_ifa(in_dev, ifap, 0);
1184                         ifa->ifa_mask = sin->sin_addr.s_addr;
1185                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1186
1187                         /* See if current broadcast address matches
1188                          * with current netmask, then recalculate
1189                          * the broadcast address. Otherwise it's a
1190                          * funny address, so don't touch it since
1191                          * the user seems to know what (s)he's doing...
1192                          */
1193                         if ((dev->flags & IFF_BROADCAST) &&
1194                             (ifa->ifa_prefixlen < 31) &&
1195                             (ifa->ifa_broadcast ==
1196                              (ifa->ifa_local|~old_mask))) {
1197                                 ifa->ifa_broadcast = (ifa->ifa_local |
1198                                                       ~sin->sin_addr.s_addr);
1199                         }
1200                         inet_insert_ifa(ifa);
1201                 }
1202                 break;
1203         }
1204 done:
1205         rtnl_unlock();
1206 out:
1207         return ret;
1208 }
1209
1210 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1211 {
1212         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1213         struct in_ifaddr *ifa;
1214         struct ifreq ifr;
1215         int done = 0;
1216
1217         if (WARN_ON(size > sizeof(struct ifreq)))
1218                 goto out;
1219
1220         if (!in_dev)
1221                 goto out;
1222
1223         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1224                 if (!buf) {
1225                         done += size;
1226                         continue;
1227                 }
1228                 if (len < size)
1229                         break;
1230                 memset(&ifr, 0, sizeof(struct ifreq));
1231                 strcpy(ifr.ifr_name, ifa->ifa_label);
1232
1233                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1234                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1235                                                                 ifa->ifa_local;
1236
1237                 if (copy_to_user(buf + done, &ifr, size)) {
1238                         done = -EFAULT;
1239                         break;
1240                 }
1241                 len  -= size;
1242                 done += size;
1243         }
1244 out:
1245         return done;
1246 }
1247
1248 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1249                                  int scope)
1250 {
1251         for_primary_ifa(in_dev) {
1252                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1253                     ifa->ifa_scope <= scope)
1254                         return ifa->ifa_local;
1255         } endfor_ifa(in_dev);
1256
1257         return 0;
1258 }
1259
1260 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1261 {
1262         __be32 addr = 0;
1263         struct in_device *in_dev;
1264         struct net *net = dev_net(dev);
1265         int master_idx;
1266
1267         rcu_read_lock();
1268         in_dev = __in_dev_get_rcu(dev);
1269         if (!in_dev)
1270                 goto no_in_dev;
1271
1272         for_primary_ifa(in_dev) {
1273                 if (ifa->ifa_scope > scope)
1274                         continue;
1275                 if (!dst || inet_ifa_match(dst, ifa)) {
1276                         addr = ifa->ifa_local;
1277                         break;
1278                 }
1279                 if (!addr)
1280                         addr = ifa->ifa_local;
1281         } endfor_ifa(in_dev);
1282
1283         if (addr)
1284                 goto out_unlock;
1285 no_in_dev:
1286         master_idx = l3mdev_master_ifindex_rcu(dev);
1287
1288         /* For VRFs, the VRF device takes the place of the loopback device,
1289          * with addresses on it being preferred.  Note in such cases the
1290          * loopback device will be among the devices that fail the master_idx
1291          * equality check in the loop below.
1292          */
1293         if (master_idx &&
1294             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1295             (in_dev = __in_dev_get_rcu(dev))) {
1296                 addr = in_dev_select_addr(in_dev, scope);
1297                 if (addr)
1298                         goto out_unlock;
1299         }
1300
1301         /* Not loopback addresses on loopback should be preferred
1302            in this case. It is important that lo is the first interface
1303            in dev_base list.
1304          */
1305         for_each_netdev_rcu(net, dev) {
1306                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1307                         continue;
1308
1309                 in_dev = __in_dev_get_rcu(dev);
1310                 if (!in_dev)
1311                         continue;
1312
1313                 addr = in_dev_select_addr(in_dev, scope);
1314                 if (addr)
1315                         goto out_unlock;
1316         }
1317 out_unlock:
1318         rcu_read_unlock();
1319         return addr;
1320 }
1321 EXPORT_SYMBOL(inet_select_addr);
1322
1323 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1324                               __be32 local, int scope)
1325 {
1326         int same = 0;
1327         __be32 addr = 0;
1328
1329         for_ifa(in_dev) {
1330                 if (!addr &&
1331                     (local == ifa->ifa_local || !local) &&
1332                     ifa->ifa_scope <= scope) {
1333                         addr = ifa->ifa_local;
1334                         if (same)
1335                                 break;
1336                 }
1337                 if (!same) {
1338                         same = (!local || inet_ifa_match(local, ifa)) &&
1339                                 (!dst || inet_ifa_match(dst, ifa));
1340                         if (same && addr) {
1341                                 if (local || !dst)
1342                                         break;
1343                                 /* Is the selected addr into dst subnet? */
1344                                 if (inet_ifa_match(addr, ifa))
1345                                         break;
1346                                 /* No, then can we use new local src? */
1347                                 if (ifa->ifa_scope <= scope) {
1348                                         addr = ifa->ifa_local;
1349                                         break;
1350                                 }
1351                                 /* search for large dst subnet for addr */
1352                                 same = 0;
1353                         }
1354                 }
1355         } endfor_ifa(in_dev);
1356
1357         return same ? addr : 0;
1358 }
1359
1360 /*
1361  * Confirm that local IP address exists using wildcards:
1362  * - net: netns to check, cannot be NULL
1363  * - in_dev: only on this interface, NULL=any interface
1364  * - dst: only in the same subnet as dst, 0=any dst
1365  * - local: address, 0=autoselect the local address
1366  * - scope: maximum allowed scope value for the local address
1367  */
1368 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1369                          __be32 dst, __be32 local, int scope)
1370 {
1371         __be32 addr = 0;
1372         struct net_device *dev;
1373
1374         if (in_dev)
1375                 return confirm_addr_indev(in_dev, dst, local, scope);
1376
1377         rcu_read_lock();
1378         for_each_netdev_rcu(net, dev) {
1379                 in_dev = __in_dev_get_rcu(dev);
1380                 if (in_dev) {
1381                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1382                         if (addr)
1383                                 break;
1384                 }
1385         }
1386         rcu_read_unlock();
1387
1388         return addr;
1389 }
1390 EXPORT_SYMBOL(inet_confirm_addr);
1391
1392 /*
1393  *      Device notifier
1394  */
1395
1396 int register_inetaddr_notifier(struct notifier_block *nb)
1397 {
1398         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1399 }
1400 EXPORT_SYMBOL(register_inetaddr_notifier);
1401
1402 int unregister_inetaddr_notifier(struct notifier_block *nb)
1403 {
1404         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1405 }
1406 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1407
1408 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1409 {
1410         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1411 }
1412 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1413
1414 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1415 {
1416         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1417             nb);
1418 }
1419 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1420
1421 /* Rename ifa_labels for a device name change. Make some effort to preserve
1422  * existing alias numbering and to create unique labels if possible.
1423 */
1424 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1425 {
1426         struct in_ifaddr *ifa;
1427         int named = 0;
1428
1429         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1430                 char old[IFNAMSIZ], *dot;
1431
1432                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1433                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1434                 if (named++ == 0)
1435                         goto skip;
1436                 dot = strchr(old, ':');
1437                 if (!dot) {
1438                         sprintf(old, ":%d", named);
1439                         dot = old;
1440                 }
1441                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1442                         strcat(ifa->ifa_label, dot);
1443                 else
1444                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1445 skip:
1446                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1447         }
1448 }
1449
1450 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1451                                         struct in_device *in_dev)
1452
1453 {
1454         struct in_ifaddr *ifa;
1455
1456         for (ifa = in_dev->ifa_list; ifa;
1457              ifa = ifa->ifa_next) {
1458                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1459                          ifa->ifa_local, dev,
1460                          ifa->ifa_local, NULL,
1461                          dev->dev_addr, NULL);
1462         }
1463 }
1464
1465 /* Called only under RTNL semaphore */
1466
1467 static int inetdev_event(struct notifier_block *this, unsigned long event,
1468                          void *ptr)
1469 {
1470         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1471         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1472
1473         ASSERT_RTNL();
1474
1475         if (!in_dev) {
1476                 if (event == NETDEV_REGISTER) {
1477                         in_dev = inetdev_init(dev);
1478                         if (IS_ERR(in_dev))
1479                                 return notifier_from_errno(PTR_ERR(in_dev));
1480                         if (dev->flags & IFF_LOOPBACK) {
1481                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1482                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1483                         }
1484                 } else if (event == NETDEV_CHANGEMTU) {
1485                         /* Re-enabling IP */
1486                         if (inetdev_valid_mtu(dev->mtu))
1487                                 in_dev = inetdev_init(dev);
1488                 }
1489                 goto out;
1490         }
1491
1492         switch (event) {
1493         case NETDEV_REGISTER:
1494                 pr_debug("%s: bug\n", __func__);
1495                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1496                 break;
1497         case NETDEV_UP:
1498                 if (!inetdev_valid_mtu(dev->mtu))
1499                         break;
1500                 if (dev->flags & IFF_LOOPBACK) {
1501                         struct in_ifaddr *ifa = inet_alloc_ifa();
1502
1503                         if (ifa) {
1504                                 INIT_HLIST_NODE(&ifa->hash);
1505                                 ifa->ifa_local =
1506                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1507                                 ifa->ifa_prefixlen = 8;
1508                                 ifa->ifa_mask = inet_make_mask(8);
1509                                 in_dev_hold(in_dev);
1510                                 ifa->ifa_dev = in_dev;
1511                                 ifa->ifa_scope = RT_SCOPE_HOST;
1512                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1513                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1514                                                  INFINITY_LIFE_TIME);
1515                                 ipv4_devconf_setall(in_dev);
1516                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1517                                 inet_insert_ifa(ifa);
1518                         }
1519                 }
1520                 ip_mc_up(in_dev);
1521                 /* fall through */
1522         case NETDEV_CHANGEADDR:
1523                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1524                         break;
1525                 /* fall through */
1526         case NETDEV_NOTIFY_PEERS:
1527                 /* Send gratuitous ARP to notify of link change */
1528                 inetdev_send_gratuitous_arp(dev, in_dev);
1529                 break;
1530         case NETDEV_DOWN:
1531                 ip_mc_down(in_dev);
1532                 break;
1533         case NETDEV_PRE_TYPE_CHANGE:
1534                 ip_mc_unmap(in_dev);
1535                 break;
1536         case NETDEV_POST_TYPE_CHANGE:
1537                 ip_mc_remap(in_dev);
1538                 break;
1539         case NETDEV_CHANGEMTU:
1540                 if (inetdev_valid_mtu(dev->mtu))
1541                         break;
1542                 /* disable IP when MTU is not enough */
1543                 /* fall through */
1544         case NETDEV_UNREGISTER:
1545                 inetdev_destroy(in_dev);
1546                 break;
1547         case NETDEV_CHANGENAME:
1548                 /* Do not notify about label change, this event is
1549                  * not interesting to applications using netlink.
1550                  */
1551                 inetdev_changename(dev, in_dev);
1552
1553                 devinet_sysctl_unregister(in_dev);
1554                 devinet_sysctl_register(in_dev);
1555                 break;
1556         }
1557 out:
1558         return NOTIFY_DONE;
1559 }
1560
1561 static struct notifier_block ip_netdev_notifier = {
1562         .notifier_call = inetdev_event,
1563 };
1564
1565 static size_t inet_nlmsg_size(void)
1566 {
1567         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1568                + nla_total_size(4) /* IFA_ADDRESS */
1569                + nla_total_size(4) /* IFA_LOCAL */
1570                + nla_total_size(4) /* IFA_BROADCAST */
1571                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1572                + nla_total_size(4)  /* IFA_FLAGS */
1573                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1574                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1575 }
1576
1577 static inline u32 cstamp_delta(unsigned long cstamp)
1578 {
1579         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1580 }
1581
1582 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1583                          unsigned long tstamp, u32 preferred, u32 valid)
1584 {
1585         struct ifa_cacheinfo ci;
1586
1587         ci.cstamp = cstamp_delta(cstamp);
1588         ci.tstamp = cstamp_delta(tstamp);
1589         ci.ifa_prefered = preferred;
1590         ci.ifa_valid = valid;
1591
1592         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1593 }
1594
1595 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1596                             u32 portid, u32 seq, int event, unsigned int flags)
1597 {
1598         struct ifaddrmsg *ifm;
1599         struct nlmsghdr  *nlh;
1600         u32 preferred, valid;
1601
1602         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1603         if (!nlh)
1604                 return -EMSGSIZE;
1605
1606         ifm = nlmsg_data(nlh);
1607         ifm->ifa_family = AF_INET;
1608         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1609         ifm->ifa_flags = ifa->ifa_flags;
1610         ifm->ifa_scope = ifa->ifa_scope;
1611         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1612
1613         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1614                 preferred = ifa->ifa_preferred_lft;
1615                 valid = ifa->ifa_valid_lft;
1616                 if (preferred != INFINITY_LIFE_TIME) {
1617                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1618
1619                         if (preferred > tval)
1620                                 preferred -= tval;
1621                         else
1622                                 preferred = 0;
1623                         if (valid != INFINITY_LIFE_TIME) {
1624                                 if (valid > tval)
1625                                         valid -= tval;
1626                                 else
1627                                         valid = 0;
1628                         }
1629                 }
1630         } else {
1631                 preferred = INFINITY_LIFE_TIME;
1632                 valid = INFINITY_LIFE_TIME;
1633         }
1634         if ((ifa->ifa_address &&
1635              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1636             (ifa->ifa_local &&
1637              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1638             (ifa->ifa_broadcast &&
1639              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1640             (ifa->ifa_label[0] &&
1641              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1642             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1643             (ifa->ifa_rt_priority &&
1644              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1645             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1646                           preferred, valid))
1647                 goto nla_put_failure;
1648
1649         nlmsg_end(skb, nlh);
1650         return 0;
1651
1652 nla_put_failure:
1653         nlmsg_cancel(skb, nlh);
1654         return -EMSGSIZE;
1655 }
1656
1657 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1658 {
1659         struct net *net = sock_net(skb->sk);
1660         int h, s_h;
1661         int idx, s_idx;
1662         int ip_idx, s_ip_idx;
1663         struct net_device *dev;
1664         struct in_device *in_dev;
1665         struct in_ifaddr *ifa;
1666         struct hlist_head *head;
1667
1668         s_h = cb->args[0];
1669         s_idx = idx = cb->args[1];
1670         s_ip_idx = ip_idx = cb->args[2];
1671
1672         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1673                 idx = 0;
1674                 head = &net->dev_index_head[h];
1675                 rcu_read_lock();
1676                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1677                           net->dev_base_seq;
1678                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1679                         if (idx < s_idx)
1680                                 goto cont;
1681                         if (h > s_h || idx > s_idx)
1682                                 s_ip_idx = 0;
1683                         in_dev = __in_dev_get_rcu(dev);
1684                         if (!in_dev)
1685                                 goto cont;
1686
1687                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1688                              ifa = ifa->ifa_next, ip_idx++) {
1689                                 if (ip_idx < s_ip_idx)
1690                                         continue;
1691                                 if (inet_fill_ifaddr(skb, ifa,
1692                                              NETLINK_CB(cb->skb).portid,
1693                                              cb->nlh->nlmsg_seq,
1694                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1695                                         rcu_read_unlock();
1696                                         goto done;
1697                                 }
1698                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1699                         }
1700 cont:
1701                         idx++;
1702                 }
1703                 rcu_read_unlock();
1704         }
1705
1706 done:
1707         cb->args[0] = h;
1708         cb->args[1] = idx;
1709         cb->args[2] = ip_idx;
1710
1711         return skb->len;
1712 }
1713
1714 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1715                       u32 portid)
1716 {
1717         struct sk_buff *skb;
1718         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1719         int err = -ENOBUFS;
1720         struct net *net;
1721
1722         net = dev_net(ifa->ifa_dev->dev);
1723         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1724         if (!skb)
1725                 goto errout;
1726
1727         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1728         if (err < 0) {
1729                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1730                 WARN_ON(err == -EMSGSIZE);
1731                 kfree_skb(skb);
1732                 goto errout;
1733         }
1734         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1735         return;
1736 errout:
1737         if (err < 0)
1738                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1739 }
1740
1741 static size_t inet_get_link_af_size(const struct net_device *dev,
1742                                     u32 ext_filter_mask)
1743 {
1744         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1745
1746         if (!in_dev)
1747                 return 0;
1748
1749         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1750 }
1751
1752 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1753                              u32 ext_filter_mask)
1754 {
1755         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1756         struct nlattr *nla;
1757         int i;
1758
1759         if (!in_dev)
1760                 return -ENODATA;
1761
1762         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1763         if (!nla)
1764                 return -EMSGSIZE;
1765
1766         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1767                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1768
1769         return 0;
1770 }
1771
1772 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1773         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1774 };
1775
1776 static int inet_validate_link_af(const struct net_device *dev,
1777                                  const struct nlattr *nla)
1778 {
1779         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780         int err, rem;
1781
1782         if (dev && !__in_dev_get_rcu(dev))
1783                 return -EAFNOSUPPORT;
1784
1785         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1786         if (err < 0)
1787                 return err;
1788
1789         if (tb[IFLA_INET_CONF]) {
1790                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1791                         int cfgid = nla_type(a);
1792
1793                         if (nla_len(a) < 4)
1794                                 return -EINVAL;
1795
1796                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1797                                 return -EINVAL;
1798                 }
1799         }
1800
1801         return 0;
1802 }
1803
1804 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1805 {
1806         struct in_device *in_dev = __in_dev_get_rcu(dev);
1807         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1808         int rem;
1809
1810         if (!in_dev)
1811                 return -EAFNOSUPPORT;
1812
1813         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1814                 BUG();
1815
1816         if (tb[IFLA_INET_CONF]) {
1817                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1818                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1819         }
1820
1821         return 0;
1822 }
1823
1824 static int inet_netconf_msgsize_devconf(int type)
1825 {
1826         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1827                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1828         bool all = false;
1829
1830         if (type == NETCONFA_ALL)
1831                 all = true;
1832
1833         if (all || type == NETCONFA_FORWARDING)
1834                 size += nla_total_size(4);
1835         if (all || type == NETCONFA_RP_FILTER)
1836                 size += nla_total_size(4);
1837         if (all || type == NETCONFA_MC_FORWARDING)
1838                 size += nla_total_size(4);
1839         if (all || type == NETCONFA_BC_FORWARDING)
1840                 size += nla_total_size(4);
1841         if (all || type == NETCONFA_PROXY_NEIGH)
1842                 size += nla_total_size(4);
1843         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1844                 size += nla_total_size(4);
1845
1846         return size;
1847 }
1848
1849 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1850                                      struct ipv4_devconf *devconf, u32 portid,
1851                                      u32 seq, int event, unsigned int flags,
1852                                      int type)
1853 {
1854         struct nlmsghdr  *nlh;
1855         struct netconfmsg *ncm;
1856         bool all = false;
1857
1858         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1859                         flags);
1860         if (!nlh)
1861                 return -EMSGSIZE;
1862
1863         if (type == NETCONFA_ALL)
1864                 all = true;
1865
1866         ncm = nlmsg_data(nlh);
1867         ncm->ncm_family = AF_INET;
1868
1869         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1870                 goto nla_put_failure;
1871
1872         if (!devconf)
1873                 goto out;
1874
1875         if ((all || type == NETCONFA_FORWARDING) &&
1876             nla_put_s32(skb, NETCONFA_FORWARDING,
1877                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1878                 goto nla_put_failure;
1879         if ((all || type == NETCONFA_RP_FILTER) &&
1880             nla_put_s32(skb, NETCONFA_RP_FILTER,
1881                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1882                 goto nla_put_failure;
1883         if ((all || type == NETCONFA_MC_FORWARDING) &&
1884             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1885                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1886                 goto nla_put_failure;
1887         if ((all || type == NETCONFA_BC_FORWARDING) &&
1888             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
1889                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
1890                 goto nla_put_failure;
1891         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1892             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1893                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1894                 goto nla_put_failure;
1895         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1896             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1897                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1898                 goto nla_put_failure;
1899
1900 out:
1901         nlmsg_end(skb, nlh);
1902         return 0;
1903
1904 nla_put_failure:
1905         nlmsg_cancel(skb, nlh);
1906         return -EMSGSIZE;
1907 }
1908
1909 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1910                                  int ifindex, struct ipv4_devconf *devconf)
1911 {
1912         struct sk_buff *skb;
1913         int err = -ENOBUFS;
1914
1915         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1916         if (!skb)
1917                 goto errout;
1918
1919         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1920                                         event, 0, type);
1921         if (err < 0) {
1922                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1923                 WARN_ON(err == -EMSGSIZE);
1924                 kfree_skb(skb);
1925                 goto errout;
1926         }
1927         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1928         return;
1929 errout:
1930         if (err < 0)
1931                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1932 }
1933
1934 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1935         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1936         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1937         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1938         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1939         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1940 };
1941
1942 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1943                                     struct nlmsghdr *nlh,
1944                                     struct netlink_ext_ack *extack)
1945 {
1946         struct net *net = sock_net(in_skb->sk);
1947         struct nlattr *tb[NETCONFA_MAX+1];
1948         struct netconfmsg *ncm;
1949         struct sk_buff *skb;
1950         struct ipv4_devconf *devconf;
1951         struct in_device *in_dev;
1952         struct net_device *dev;
1953         int ifindex;
1954         int err;
1955
1956         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1957                           devconf_ipv4_policy, extack);
1958         if (err < 0)
1959                 goto errout;
1960
1961         err = -EINVAL;
1962         if (!tb[NETCONFA_IFINDEX])
1963                 goto errout;
1964
1965         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1966         switch (ifindex) {
1967         case NETCONFA_IFINDEX_ALL:
1968                 devconf = net->ipv4.devconf_all;
1969                 break;
1970         case NETCONFA_IFINDEX_DEFAULT:
1971                 devconf = net->ipv4.devconf_dflt;
1972                 break;
1973         default:
1974                 dev = __dev_get_by_index(net, ifindex);
1975                 if (!dev)
1976                         goto errout;
1977                 in_dev = __in_dev_get_rtnl(dev);
1978                 if (!in_dev)
1979                         goto errout;
1980                 devconf = &in_dev->cnf;
1981                 break;
1982         }
1983
1984         err = -ENOBUFS;
1985         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1986         if (!skb)
1987                 goto errout;
1988
1989         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1990                                         NETLINK_CB(in_skb).portid,
1991                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1992                                         NETCONFA_ALL);
1993         if (err < 0) {
1994                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1995                 WARN_ON(err == -EMSGSIZE);
1996                 kfree_skb(skb);
1997                 goto errout;
1998         }
1999         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2000 errout:
2001         return err;
2002 }
2003
2004 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2005                                      struct netlink_callback *cb)
2006 {
2007         struct net *net = sock_net(skb->sk);
2008         int h, s_h;
2009         int idx, s_idx;
2010         struct net_device *dev;
2011         struct in_device *in_dev;
2012         struct hlist_head *head;
2013
2014         s_h = cb->args[0];
2015         s_idx = idx = cb->args[1];
2016
2017         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2018                 idx = 0;
2019                 head = &net->dev_index_head[h];
2020                 rcu_read_lock();
2021                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2022                           net->dev_base_seq;
2023                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2024                         if (idx < s_idx)
2025                                 goto cont;
2026                         in_dev = __in_dev_get_rcu(dev);
2027                         if (!in_dev)
2028                                 goto cont;
2029
2030                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2031                                                       &in_dev->cnf,
2032                                                       NETLINK_CB(cb->skb).portid,
2033                                                       cb->nlh->nlmsg_seq,
2034                                                       RTM_NEWNETCONF,
2035                                                       NLM_F_MULTI,
2036                                                       NETCONFA_ALL) < 0) {
2037                                 rcu_read_unlock();
2038                                 goto done;
2039                         }
2040                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2041 cont:
2042                         idx++;
2043                 }
2044                 rcu_read_unlock();
2045         }
2046         if (h == NETDEV_HASHENTRIES) {
2047                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2048                                               net->ipv4.devconf_all,
2049                                               NETLINK_CB(cb->skb).portid,
2050                                               cb->nlh->nlmsg_seq,
2051                                               RTM_NEWNETCONF, NLM_F_MULTI,
2052                                               NETCONFA_ALL) < 0)
2053                         goto done;
2054                 else
2055                         h++;
2056         }
2057         if (h == NETDEV_HASHENTRIES + 1) {
2058                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2059                                               net->ipv4.devconf_dflt,
2060                                               NETLINK_CB(cb->skb).portid,
2061                                               cb->nlh->nlmsg_seq,
2062                                               RTM_NEWNETCONF, NLM_F_MULTI,
2063                                               NETCONFA_ALL) < 0)
2064                         goto done;
2065                 else
2066                         h++;
2067         }
2068 done:
2069         cb->args[0] = h;
2070         cb->args[1] = idx;
2071
2072         return skb->len;
2073 }
2074
2075 #ifdef CONFIG_SYSCTL
2076
2077 static void devinet_copy_dflt_conf(struct net *net, int i)
2078 {
2079         struct net_device *dev;
2080
2081         rcu_read_lock();
2082         for_each_netdev_rcu(net, dev) {
2083                 struct in_device *in_dev;
2084
2085                 in_dev = __in_dev_get_rcu(dev);
2086                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2087                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2088         }
2089         rcu_read_unlock();
2090 }
2091
2092 /* called with RTNL locked */
2093 static void inet_forward_change(struct net *net)
2094 {
2095         struct net_device *dev;
2096         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2097
2098         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2099         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2100         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2101                                     NETCONFA_FORWARDING,
2102                                     NETCONFA_IFINDEX_ALL,
2103                                     net->ipv4.devconf_all);
2104         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2105                                     NETCONFA_FORWARDING,
2106                                     NETCONFA_IFINDEX_DEFAULT,
2107                                     net->ipv4.devconf_dflt);
2108
2109         for_each_netdev(net, dev) {
2110                 struct in_device *in_dev;
2111
2112                 if (on)
2113                         dev_disable_lro(dev);
2114
2115                 in_dev = __in_dev_get_rtnl(dev);
2116                 if (in_dev) {
2117                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2118                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2119                                                     NETCONFA_FORWARDING,
2120                                                     dev->ifindex, &in_dev->cnf);
2121                 }
2122         }
2123 }
2124
2125 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2126 {
2127         if (cnf == net->ipv4.devconf_dflt)
2128                 return NETCONFA_IFINDEX_DEFAULT;
2129         else if (cnf == net->ipv4.devconf_all)
2130                 return NETCONFA_IFINDEX_ALL;
2131         else {
2132                 struct in_device *idev
2133                         = container_of(cnf, struct in_device, cnf);
2134                 return idev->dev->ifindex;
2135         }
2136 }
2137
2138 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2139                              void __user *buffer,
2140                              size_t *lenp, loff_t *ppos)
2141 {
2142         int old_value = *(int *)ctl->data;
2143         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2144         int new_value = *(int *)ctl->data;
2145
2146         if (write) {
2147                 struct ipv4_devconf *cnf = ctl->extra1;
2148                 struct net *net = ctl->extra2;
2149                 int i = (int *)ctl->data - cnf->data;
2150                 int ifindex;
2151
2152                 set_bit(i, cnf->state);
2153
2154                 if (cnf == net->ipv4.devconf_dflt)
2155                         devinet_copy_dflt_conf(net, i);
2156                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2157                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2158                         if ((new_value == 0) && (old_value != 0))
2159                                 rt_cache_flush(net);
2160
2161                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2162                     new_value != old_value)
2163                         rt_cache_flush(net);
2164
2165                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2166                     new_value != old_value) {
2167                         ifindex = devinet_conf_ifindex(net, cnf);
2168                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2169                                                     NETCONFA_RP_FILTER,
2170                                                     ifindex, cnf);
2171                 }
2172                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2173                     new_value != old_value) {
2174                         ifindex = devinet_conf_ifindex(net, cnf);
2175                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2176                                                     NETCONFA_PROXY_NEIGH,
2177                                                     ifindex, cnf);
2178                 }
2179                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2180                     new_value != old_value) {
2181                         ifindex = devinet_conf_ifindex(net, cnf);
2182                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2183                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2184                                                     ifindex, cnf);
2185                 }
2186         }
2187
2188         return ret;
2189 }
2190
2191 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2192                                   void __user *buffer,
2193                                   size_t *lenp, loff_t *ppos)
2194 {
2195         int *valp = ctl->data;
2196         int val = *valp;
2197         loff_t pos = *ppos;
2198         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2199
2200         if (write && *valp != val) {
2201                 struct net *net = ctl->extra2;
2202
2203                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2204                         if (!rtnl_trylock()) {
2205                                 /* Restore the original values before restarting */
2206                                 *valp = val;
2207                                 *ppos = pos;
2208                                 return restart_syscall();
2209                         }
2210                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2211                                 inet_forward_change(net);
2212                         } else {
2213                                 struct ipv4_devconf *cnf = ctl->extra1;
2214                                 struct in_device *idev =
2215                                         container_of(cnf, struct in_device, cnf);
2216                                 if (*valp)
2217                                         dev_disable_lro(idev->dev);
2218                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2219                                                             NETCONFA_FORWARDING,
2220                                                             idev->dev->ifindex,
2221                                                             cnf);
2222                         }
2223                         rtnl_unlock();
2224                         rt_cache_flush(net);
2225                 } else
2226                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2227                                                     NETCONFA_FORWARDING,
2228                                                     NETCONFA_IFINDEX_DEFAULT,
2229                                                     net->ipv4.devconf_dflt);
2230         }
2231
2232         return ret;
2233 }
2234
2235 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2236                                 void __user *buffer,
2237                                 size_t *lenp, loff_t *ppos)
2238 {
2239         int *valp = ctl->data;
2240         int val = *valp;
2241         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2242         struct net *net = ctl->extra2;
2243
2244         if (write && *valp != val)
2245                 rt_cache_flush(net);
2246
2247         return ret;
2248 }
2249
2250 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2251         { \
2252                 .procname       = name, \
2253                 .data           = ipv4_devconf.data + \
2254                                   IPV4_DEVCONF_ ## attr - 1, \
2255                 .maxlen         = sizeof(int), \
2256                 .mode           = mval, \
2257                 .proc_handler   = proc, \
2258                 .extra1         = &ipv4_devconf, \
2259         }
2260
2261 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2262         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2263
2264 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2265         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2266
2267 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2268         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2269
2270 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2271         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2272
2273 static struct devinet_sysctl_table {
2274         struct ctl_table_header *sysctl_header;
2275         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2276 } devinet_sysctl = {
2277         .devinet_vars = {
2278                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2279                                              devinet_sysctl_forward),
2280                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2281                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2282
2283                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2284                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2285                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2286                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2287                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2288                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2289                                         "accept_source_route"),
2290                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2291                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2292                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2293                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2294                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2295                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2296                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2297                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2298                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2299                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2300                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2301                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2302                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2303                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2304                                         "force_igmp_version"),
2305                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2306                                         "igmpv2_unsolicited_report_interval"),
2307                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2308                                         "igmpv3_unsolicited_report_interval"),
2309                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2310                                         "ignore_routes_with_linkdown"),
2311                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2312                                         "drop_gratuitous_arp"),
2313
2314                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2315                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2316                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2317                                               "promote_secondaries"),
2318                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2319                                               "route_localnet"),
2320                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2321                                               "drop_unicast_in_l2_multicast"),
2322         },
2323 };
2324
2325 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2326                                      int ifindex, struct ipv4_devconf *p)
2327 {
2328         int i;
2329         struct devinet_sysctl_table *t;
2330         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2331
2332         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2333         if (!t)
2334                 goto out;
2335
2336         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2337                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2338                 t->devinet_vars[i].extra1 = p;
2339                 t->devinet_vars[i].extra2 = net;
2340         }
2341
2342         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2343
2344         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2345         if (!t->sysctl_header)
2346                 goto free;
2347
2348         p->sysctl = t;
2349
2350         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2351                                     ifindex, p);
2352         return 0;
2353
2354 free:
2355         kfree(t);
2356 out:
2357         return -ENOBUFS;
2358 }
2359
2360 static void __devinet_sysctl_unregister(struct net *net,
2361                                         struct ipv4_devconf *cnf, int ifindex)
2362 {
2363         struct devinet_sysctl_table *t = cnf->sysctl;
2364
2365         if (t) {
2366                 cnf->sysctl = NULL;
2367                 unregister_net_sysctl_table(t->sysctl_header);
2368                 kfree(t);
2369         }
2370
2371         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2372 }
2373
2374 static int devinet_sysctl_register(struct in_device *idev)
2375 {
2376         int err;
2377
2378         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2379                 return -EINVAL;
2380
2381         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2382         if (err)
2383                 return err;
2384         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2385                                         idev->dev->ifindex, &idev->cnf);
2386         if (err)
2387                 neigh_sysctl_unregister(idev->arp_parms);
2388         return err;
2389 }
2390
2391 static void devinet_sysctl_unregister(struct in_device *idev)
2392 {
2393         struct net *net = dev_net(idev->dev);
2394
2395         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2396         neigh_sysctl_unregister(idev->arp_parms);
2397 }
2398
2399 static struct ctl_table ctl_forward_entry[] = {
2400         {
2401                 .procname       = "ip_forward",
2402                 .data           = &ipv4_devconf.data[
2403                                         IPV4_DEVCONF_FORWARDING - 1],
2404                 .maxlen         = sizeof(int),
2405                 .mode           = 0644,
2406                 .proc_handler   = devinet_sysctl_forward,
2407                 .extra1         = &ipv4_devconf,
2408                 .extra2         = &init_net,
2409         },
2410         { },
2411 };
2412 #endif
2413
2414 static __net_init int devinet_init_net(struct net *net)
2415 {
2416         int err;
2417         struct ipv4_devconf *all, *dflt;
2418 #ifdef CONFIG_SYSCTL
2419         struct ctl_table *tbl = ctl_forward_entry;
2420         struct ctl_table_header *forw_hdr;
2421 #endif
2422
2423         err = -ENOMEM;
2424         all = &ipv4_devconf;
2425         dflt = &ipv4_devconf_dflt;
2426
2427         if (!net_eq(net, &init_net)) {
2428                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2429                 if (!all)
2430                         goto err_alloc_all;
2431
2432                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2433                 if (!dflt)
2434                         goto err_alloc_dflt;
2435
2436 #ifdef CONFIG_SYSCTL
2437                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2438                 if (!tbl)
2439                         goto err_alloc_ctl;
2440
2441                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2442                 tbl[0].extra1 = all;
2443                 tbl[0].extra2 = net;
2444 #endif
2445         }
2446
2447 #ifdef CONFIG_SYSCTL
2448         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2449         if (err < 0)
2450                 goto err_reg_all;
2451
2452         err = __devinet_sysctl_register(net, "default",
2453                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2454         if (err < 0)
2455                 goto err_reg_dflt;
2456
2457         err = -ENOMEM;
2458         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2459         if (!forw_hdr)
2460                 goto err_reg_ctl;
2461         net->ipv4.forw_hdr = forw_hdr;
2462 #endif
2463
2464         net->ipv4.devconf_all = all;
2465         net->ipv4.devconf_dflt = dflt;
2466         return 0;
2467
2468 #ifdef CONFIG_SYSCTL
2469 err_reg_ctl:
2470         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2471 err_reg_dflt:
2472         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2473 err_reg_all:
2474         if (tbl != ctl_forward_entry)
2475                 kfree(tbl);
2476 err_alloc_ctl:
2477 #endif
2478         if (dflt != &ipv4_devconf_dflt)
2479                 kfree(dflt);
2480 err_alloc_dflt:
2481         if (all != &ipv4_devconf)
2482                 kfree(all);
2483 err_alloc_all:
2484         return err;
2485 }
2486
2487 static __net_exit void devinet_exit_net(struct net *net)
2488 {
2489 #ifdef CONFIG_SYSCTL
2490         struct ctl_table *tbl;
2491
2492         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2493         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2494         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2495                                     NETCONFA_IFINDEX_DEFAULT);
2496         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2497                                     NETCONFA_IFINDEX_ALL);
2498         kfree(tbl);
2499 #endif
2500         kfree(net->ipv4.devconf_dflt);
2501         kfree(net->ipv4.devconf_all);
2502 }
2503
2504 static __net_initdata struct pernet_operations devinet_ops = {
2505         .init = devinet_init_net,
2506         .exit = devinet_exit_net,
2507 };
2508
2509 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2510         .family           = AF_INET,
2511         .fill_link_af     = inet_fill_link_af,
2512         .get_link_af_size = inet_get_link_af_size,
2513         .validate_link_af = inet_validate_link_af,
2514         .set_link_af      = inet_set_link_af,
2515 };
2516
2517 void __init devinet_init(void)
2518 {
2519         int i;
2520
2521         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2522                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2523
2524         register_pernet_subsys(&devinet_ops);
2525
2526         register_gifconf(PF_INET, inet_gifconf);
2527         register_netdevice_notifier(&ip_netdev_notifier);
2528
2529         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2530
2531         rtnl_af_register(&inet_af_ops);
2532
2533         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2534         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2535         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2536         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2537                       inet_netconf_dump_devconf, 0);
2538 }