GNU Linux-libre 6.1.90-gnu
[releases.git] / net / ipv4 / devinet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      NET3    IP device support routines.
4  *
5  *      Derived from the IP parts of dev.c 1.0.19
6  *              Authors:        Ross Biro
7  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
9  *
10  *      Additional Authors:
11  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
12  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13  *
14  *      Changes:
15  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
16  *                                      lists.
17  *              Cyrus Durgin:           updated for kmod
18  *              Matthias Andree:        in devinet_ioctl, compare label and
19  *                                      address (4.4BSD alias style support),
20  *                                      fall back to comparing just the label
21  *                                      if no match found.
22  */
23
24
25 #include <linux/uaccess.h>
26 #include <linux/bitops.h>
27 #include <linux/capability.h>
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/kernel.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/mm.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/in.h>
37 #include <linux/errno.h>
38 #include <linux/interrupt.h>
39 #include <linux/if_addr.h>
40 #include <linux/if_ether.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/etherdevice.h>
44 #include <linux/skbuff.h>
45 #include <linux/init.h>
46 #include <linux/notifier.h>
47 #include <linux/inetdevice.h>
48 #include <linux/igmp.h>
49 #include <linux/slab.h>
50 #include <linux/hash.h>
51 #ifdef CONFIG_SYSCTL
52 #include <linux/sysctl.h>
53 #endif
54 #include <linux/kmod.h>
55 #include <linux/netconf.h>
56
57 #include <net/arp.h>
58 #include <net/ip.h>
59 #include <net/route.h>
60 #include <net/ip_fib.h>
61 #include <net/rtnetlink.h>
62 #include <net/net_namespace.h>
63 #include <net/addrconf.h>
64
65 #define IPV6ONLY_FLAGS  \
66                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79         },
80 };
81
82 static struct ipv4_devconf ipv4_devconf_dflt = {
83         .data = {
84                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91                 [IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92         },
93 };
94
95 #define IPV4_DEVCONF_DFLT(net, attr) \
96         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99         [IFA_LOCAL]             = { .type = NLA_U32 },
100         [IFA_ADDRESS]           = { .type = NLA_U32 },
101         [IFA_BROADCAST]         = { .type = NLA_U32 },
102         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
104         [IFA_FLAGS]             = { .type = NLA_U32 },
105         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
106         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
107         [IFA_PROTO]             = { .type = NLA_U8 },
108 };
109
110 struct inet_fill_args {
111         u32 portid;
112         u32 seq;
113         int event;
114         unsigned int flags;
115         int netnsid;
116         int ifindex;
117 };
118
119 #define IN4_ADDR_HSIZE_SHIFT    8
120 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
121
122 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124 static u32 inet_addr_hash(const struct net *net, __be32 addr)
125 {
126         u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129 }
130
131 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132 {
133         u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135         ASSERT_RTNL();
136         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137 }
138
139 static void inet_hash_remove(struct in_ifaddr *ifa)
140 {
141         ASSERT_RTNL();
142         hlist_del_init_rcu(&ifa->hash);
143 }
144
145 /**
146  * __ip_dev_find - find the first device with a given source address.
147  * @net: the net namespace
148  * @addr: the source address
149  * @devref: if true, take a reference on the found device
150  *
151  * If a caller uses devref=false, it should be protected by RCU, or RTNL
152  */
153 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154 {
155         struct net_device *result = NULL;
156         struct in_ifaddr *ifa;
157
158         rcu_read_lock();
159         ifa = inet_lookup_ifaddr_rcu(net, addr);
160         if (!ifa) {
161                 struct flowi4 fl4 = { .daddr = addr };
162                 struct fib_result res = { 0 };
163                 struct fib_table *local;
164
165                 /* Fallback to FIB local table so that communication
166                  * over loopback subnets work.
167                  */
168                 local = fib_get_table(net, RT_TABLE_LOCAL);
169                 if (local &&
170                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171                     res.type == RTN_LOCAL)
172                         result = FIB_RES_DEV(res);
173         } else {
174                 result = ifa->ifa_dev->dev;
175         }
176         if (result && devref)
177                 dev_hold(result);
178         rcu_read_unlock();
179         return result;
180 }
181 EXPORT_SYMBOL(__ip_dev_find);
182
183 /* called under RCU lock */
184 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185 {
186         u32 hash = inet_addr_hash(net, addr);
187         struct in_ifaddr *ifa;
188
189         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190                 if (ifa->ifa_local == addr &&
191                     net_eq(dev_net(ifa->ifa_dev->dev), net))
192                         return ifa;
193
194         return NULL;
195 }
196
197 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201 static void inet_del_ifa(struct in_device *in_dev,
202                          struct in_ifaddr __rcu **ifap,
203                          int destroy);
204 #ifdef CONFIG_SYSCTL
205 static int devinet_sysctl_register(struct in_device *idev);
206 static void devinet_sysctl_unregister(struct in_device *idev);
207 #else
208 static int devinet_sysctl_register(struct in_device *idev)
209 {
210         return 0;
211 }
212 static void devinet_sysctl_unregister(struct in_device *idev)
213 {
214 }
215 #endif
216
217 /* Locks all the inet devices. */
218
219 static struct in_ifaddr *inet_alloc_ifa(void)
220 {
221         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222 }
223
224 static void inet_rcu_free_ifa(struct rcu_head *head)
225 {
226         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227         if (ifa->ifa_dev)
228                 in_dev_put(ifa->ifa_dev);
229         kfree(ifa);
230 }
231
232 static void inet_free_ifa(struct in_ifaddr *ifa)
233 {
234         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
235 }
236
237 void in_dev_finish_destroy(struct in_device *idev)
238 {
239         struct net_device *dev = idev->dev;
240
241         WARN_ON(idev->ifa_list);
242         WARN_ON(idev->mc_list);
243         kfree(rcu_dereference_protected(idev->mc_hash, 1));
244 #ifdef NET_REFCNT_DEBUG
245         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
246 #endif
247         netdev_put(dev, &idev->dev_tracker);
248         if (!idev->dead)
249                 pr_err("Freeing alive in_device %p\n", idev);
250         else
251                 kfree(idev);
252 }
253 EXPORT_SYMBOL(in_dev_finish_destroy);
254
255 static struct in_device *inetdev_init(struct net_device *dev)
256 {
257         struct in_device *in_dev;
258         int err = -ENOMEM;
259
260         ASSERT_RTNL();
261
262         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
263         if (!in_dev)
264                 goto out;
265         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
266                         sizeof(in_dev->cnf));
267         in_dev->cnf.sysctl = NULL;
268         in_dev->dev = dev;
269         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
270         if (!in_dev->arp_parms)
271                 goto out_kfree;
272         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
273                 dev_disable_lro(dev);
274         /* Reference in_dev->dev */
275         netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
276         /* Account for reference dev->ip_ptr (below) */
277         refcount_set(&in_dev->refcnt, 1);
278
279         err = devinet_sysctl_register(in_dev);
280         if (err) {
281                 in_dev->dead = 1;
282                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
283                 in_dev_put(in_dev);
284                 in_dev = NULL;
285                 goto out;
286         }
287         ip_mc_init_dev(in_dev);
288         if (dev->flags & IFF_UP)
289                 ip_mc_up(in_dev);
290
291         /* we can receive as soon as ip_ptr is set -- do this last */
292         rcu_assign_pointer(dev->ip_ptr, in_dev);
293 out:
294         return in_dev ?: ERR_PTR(err);
295 out_kfree:
296         kfree(in_dev);
297         in_dev = NULL;
298         goto out;
299 }
300
301 static void in_dev_rcu_put(struct rcu_head *head)
302 {
303         struct in_device *idev = container_of(head, struct in_device, rcu_head);
304         in_dev_put(idev);
305 }
306
307 static void inetdev_destroy(struct in_device *in_dev)
308 {
309         struct net_device *dev;
310         struct in_ifaddr *ifa;
311
312         ASSERT_RTNL();
313
314         dev = in_dev->dev;
315
316         in_dev->dead = 1;
317
318         ip_mc_destroy_dev(in_dev);
319
320         while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
321                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
322                 inet_free_ifa(ifa);
323         }
324
325         RCU_INIT_POINTER(dev->ip_ptr, NULL);
326
327         devinet_sysctl_unregister(in_dev);
328         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
329         arp_ifdown(dev);
330
331         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
332 }
333
334 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
335 {
336         const struct in_ifaddr *ifa;
337
338         rcu_read_lock();
339         in_dev_for_each_ifa_rcu(ifa, in_dev) {
340                 if (inet_ifa_match(a, ifa)) {
341                         if (!b || inet_ifa_match(b, ifa)) {
342                                 rcu_read_unlock();
343                                 return 1;
344                         }
345                 }
346         }
347         rcu_read_unlock();
348         return 0;
349 }
350
351 static void __inet_del_ifa(struct in_device *in_dev,
352                            struct in_ifaddr __rcu **ifap,
353                            int destroy, struct nlmsghdr *nlh, u32 portid)
354 {
355         struct in_ifaddr *promote = NULL;
356         struct in_ifaddr *ifa, *ifa1;
357         struct in_ifaddr __rcu **last_prim;
358         struct in_ifaddr *prev_prom = NULL;
359         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
360
361         ASSERT_RTNL();
362
363         ifa1 = rtnl_dereference(*ifap);
364         last_prim = ifap;
365         if (in_dev->dead)
366                 goto no_promotions;
367
368         /* 1. Deleting primary ifaddr forces deletion all secondaries
369          * unless alias promotion is set
370          **/
371
372         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
373                 struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
374
375                 while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
376                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
377                             ifa1->ifa_scope <= ifa->ifa_scope)
378                                 last_prim = &ifa->ifa_next;
379
380                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
381                             ifa1->ifa_mask != ifa->ifa_mask ||
382                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
383                                 ifap1 = &ifa->ifa_next;
384                                 prev_prom = ifa;
385                                 continue;
386                         }
387
388                         if (!do_promote) {
389                                 inet_hash_remove(ifa);
390                                 *ifap1 = ifa->ifa_next;
391
392                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
393                                 blocking_notifier_call_chain(&inetaddr_chain,
394                                                 NETDEV_DOWN, ifa);
395                                 inet_free_ifa(ifa);
396                         } else {
397                                 promote = ifa;
398                                 break;
399                         }
400                 }
401         }
402
403         /* On promotion all secondaries from subnet are changing
404          * the primary IP, we must remove all their routes silently
405          * and later to add them back with new prefsrc. Do this
406          * while all addresses are on the device list.
407          */
408         for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
409                 if (ifa1->ifa_mask == ifa->ifa_mask &&
410                     inet_ifa_match(ifa1->ifa_address, ifa))
411                         fib_del_ifaddr(ifa, ifa1);
412         }
413
414 no_promotions:
415         /* 2. Unlink it */
416
417         *ifap = ifa1->ifa_next;
418         inet_hash_remove(ifa1);
419
420         /* 3. Announce address deletion */
421
422         /* Send message first, then call notifier.
423            At first sight, FIB update triggered by notifier
424            will refer to already deleted ifaddr, that could confuse
425            netlink listeners. It is not true: look, gated sees
426            that route deleted and if it still thinks that ifaddr
427            is valid, it will try to restore deleted routes... Grr.
428            So that, this order is correct.
429          */
430         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
431         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
432
433         if (promote) {
434                 struct in_ifaddr *next_sec;
435
436                 next_sec = rtnl_dereference(promote->ifa_next);
437                 if (prev_prom) {
438                         struct in_ifaddr *last_sec;
439
440                         rcu_assign_pointer(prev_prom->ifa_next, next_sec);
441
442                         last_sec = rtnl_dereference(*last_prim);
443                         rcu_assign_pointer(promote->ifa_next, last_sec);
444                         rcu_assign_pointer(*last_prim, promote);
445                 }
446
447                 promote->ifa_flags &= ~IFA_F_SECONDARY;
448                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
449                 blocking_notifier_call_chain(&inetaddr_chain,
450                                 NETDEV_UP, promote);
451                 for (ifa = next_sec; ifa;
452                      ifa = rtnl_dereference(ifa->ifa_next)) {
453                         if (ifa1->ifa_mask != ifa->ifa_mask ||
454                             !inet_ifa_match(ifa1->ifa_address, ifa))
455                                         continue;
456                         fib_add_ifaddr(ifa);
457                 }
458
459         }
460         if (destroy)
461                 inet_free_ifa(ifa1);
462 }
463
464 static void inet_del_ifa(struct in_device *in_dev,
465                          struct in_ifaddr __rcu **ifap,
466                          int destroy)
467 {
468         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
469 }
470
471 static void check_lifetime(struct work_struct *work);
472
473 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
474
475 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
476                              u32 portid, struct netlink_ext_ack *extack)
477 {
478         struct in_ifaddr __rcu **last_primary, **ifap;
479         struct in_device *in_dev = ifa->ifa_dev;
480         struct in_validator_info ivi;
481         struct in_ifaddr *ifa1;
482         int ret;
483
484         ASSERT_RTNL();
485
486         if (!ifa->ifa_local) {
487                 inet_free_ifa(ifa);
488                 return 0;
489         }
490
491         ifa->ifa_flags &= ~IFA_F_SECONDARY;
492         last_primary = &in_dev->ifa_list;
493
494         /* Don't set IPv6 only flags to IPv4 addresses */
495         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
496
497         ifap = &in_dev->ifa_list;
498         ifa1 = rtnl_dereference(*ifap);
499
500         while (ifa1) {
501                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
502                     ifa->ifa_scope <= ifa1->ifa_scope)
503                         last_primary = &ifa1->ifa_next;
504                 if (ifa1->ifa_mask == ifa->ifa_mask &&
505                     inet_ifa_match(ifa1->ifa_address, ifa)) {
506                         if (ifa1->ifa_local == ifa->ifa_local) {
507                                 inet_free_ifa(ifa);
508                                 return -EEXIST;
509                         }
510                         if (ifa1->ifa_scope != ifa->ifa_scope) {
511                                 inet_free_ifa(ifa);
512                                 return -EINVAL;
513                         }
514                         ifa->ifa_flags |= IFA_F_SECONDARY;
515                 }
516
517                 ifap = &ifa1->ifa_next;
518                 ifa1 = rtnl_dereference(*ifap);
519         }
520
521         /* Allow any devices that wish to register ifaddr validtors to weigh
522          * in now, before changes are committed.  The rntl lock is serializing
523          * access here, so the state should not change between a validator call
524          * and a final notify on commit.  This isn't invoked on promotion under
525          * the assumption that validators are checking the address itself, and
526          * not the flags.
527          */
528         ivi.ivi_addr = ifa->ifa_address;
529         ivi.ivi_dev = ifa->ifa_dev;
530         ivi.extack = extack;
531         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
532                                            NETDEV_UP, &ivi);
533         ret = notifier_to_errno(ret);
534         if (ret) {
535                 inet_free_ifa(ifa);
536                 return ret;
537         }
538
539         if (!(ifa->ifa_flags & IFA_F_SECONDARY))
540                 ifap = last_primary;
541
542         rcu_assign_pointer(ifa->ifa_next, *ifap);
543         rcu_assign_pointer(*ifap, ifa);
544
545         inet_hash_insert(dev_net(in_dev->dev), ifa);
546
547         cancel_delayed_work(&check_lifetime_work);
548         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
549
550         /* Send message first, then call notifier.
551            Notifier will trigger FIB update, so that
552            listeners of netlink will know about new ifaddr */
553         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
554         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
555
556         return 0;
557 }
558
559 static int inet_insert_ifa(struct in_ifaddr *ifa)
560 {
561         return __inet_insert_ifa(ifa, NULL, 0, NULL);
562 }
563
564 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
565 {
566         struct in_device *in_dev = __in_dev_get_rtnl(dev);
567
568         ASSERT_RTNL();
569
570         if (!in_dev) {
571                 inet_free_ifa(ifa);
572                 return -ENOBUFS;
573         }
574         ipv4_devconf_setall(in_dev);
575         neigh_parms_data_state_setall(in_dev->arp_parms);
576         if (ifa->ifa_dev != in_dev) {
577                 WARN_ON(ifa->ifa_dev);
578                 in_dev_hold(in_dev);
579                 ifa->ifa_dev = in_dev;
580         }
581         if (ipv4_is_loopback(ifa->ifa_local))
582                 ifa->ifa_scope = RT_SCOPE_HOST;
583         return inet_insert_ifa(ifa);
584 }
585
586 /* Caller must hold RCU or RTNL :
587  * We dont take a reference on found in_device
588  */
589 struct in_device *inetdev_by_index(struct net *net, int ifindex)
590 {
591         struct net_device *dev;
592         struct in_device *in_dev = NULL;
593
594         rcu_read_lock();
595         dev = dev_get_by_index_rcu(net, ifindex);
596         if (dev)
597                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
598         rcu_read_unlock();
599         return in_dev;
600 }
601 EXPORT_SYMBOL(inetdev_by_index);
602
603 /* Called only from RTNL semaphored context. No locks. */
604
605 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
606                                     __be32 mask)
607 {
608         struct in_ifaddr *ifa;
609
610         ASSERT_RTNL();
611
612         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
613                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
614                         return ifa;
615         }
616         return NULL;
617 }
618
619 static int ip_mc_autojoin_config(struct net *net, bool join,
620                                  const struct in_ifaddr *ifa)
621 {
622 #if defined(CONFIG_IP_MULTICAST)
623         struct ip_mreqn mreq = {
624                 .imr_multiaddr.s_addr = ifa->ifa_address,
625                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
626         };
627         struct sock *sk = net->ipv4.mc_autojoin_sk;
628         int ret;
629
630         ASSERT_RTNL();
631
632         lock_sock(sk);
633         if (join)
634                 ret = ip_mc_join_group(sk, &mreq);
635         else
636                 ret = ip_mc_leave_group(sk, &mreq);
637         release_sock(sk);
638
639         return ret;
640 #else
641         return -EOPNOTSUPP;
642 #endif
643 }
644
645 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
646                             struct netlink_ext_ack *extack)
647 {
648         struct net *net = sock_net(skb->sk);
649         struct in_ifaddr __rcu **ifap;
650         struct nlattr *tb[IFA_MAX+1];
651         struct in_device *in_dev;
652         struct ifaddrmsg *ifm;
653         struct in_ifaddr *ifa;
654         int err;
655
656         ASSERT_RTNL();
657
658         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
659                                      ifa_ipv4_policy, extack);
660         if (err < 0)
661                 goto errout;
662
663         ifm = nlmsg_data(nlh);
664         in_dev = inetdev_by_index(net, ifm->ifa_index);
665         if (!in_dev) {
666                 err = -ENODEV;
667                 goto errout;
668         }
669
670         for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
671              ifap = &ifa->ifa_next) {
672                 if (tb[IFA_LOCAL] &&
673                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
674                         continue;
675
676                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
677                         continue;
678
679                 if (tb[IFA_ADDRESS] &&
680                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
681                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
682                         continue;
683
684                 if (ipv4_is_multicast(ifa->ifa_address))
685                         ip_mc_autojoin_config(net, false, ifa);
686                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
687                 return 0;
688         }
689
690         err = -EADDRNOTAVAIL;
691 errout:
692         return err;
693 }
694
695 #define INFINITY_LIFE_TIME      0xFFFFFFFF
696
697 static void check_lifetime(struct work_struct *work)
698 {
699         unsigned long now, next, next_sec, next_sched;
700         struct in_ifaddr *ifa;
701         struct hlist_node *n;
702         int i;
703
704         now = jiffies;
705         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
706
707         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
708                 bool change_needed = false;
709
710                 rcu_read_lock();
711                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
712                         unsigned long age;
713
714                         if (ifa->ifa_flags & IFA_F_PERMANENT)
715                                 continue;
716
717                         /* We try to batch several events at once. */
718                         age = (now - ifa->ifa_tstamp +
719                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
720
721                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
722                             age >= ifa->ifa_valid_lft) {
723                                 change_needed = true;
724                         } else if (ifa->ifa_preferred_lft ==
725                                    INFINITY_LIFE_TIME) {
726                                 continue;
727                         } else if (age >= ifa->ifa_preferred_lft) {
728                                 if (time_before(ifa->ifa_tstamp +
729                                                 ifa->ifa_valid_lft * HZ, next))
730                                         next = ifa->ifa_tstamp +
731                                                ifa->ifa_valid_lft * HZ;
732
733                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
734                                         change_needed = true;
735                         } else if (time_before(ifa->ifa_tstamp +
736                                                ifa->ifa_preferred_lft * HZ,
737                                                next)) {
738                                 next = ifa->ifa_tstamp +
739                                        ifa->ifa_preferred_lft * HZ;
740                         }
741                 }
742                 rcu_read_unlock();
743                 if (!change_needed)
744                         continue;
745                 rtnl_lock();
746                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
747                         unsigned long age;
748
749                         if (ifa->ifa_flags & IFA_F_PERMANENT)
750                                 continue;
751
752                         /* We try to batch several events at once. */
753                         age = (now - ifa->ifa_tstamp +
754                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
755
756                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
757                             age >= ifa->ifa_valid_lft) {
758                                 struct in_ifaddr __rcu **ifap;
759                                 struct in_ifaddr *tmp;
760
761                                 ifap = &ifa->ifa_dev->ifa_list;
762                                 tmp = rtnl_dereference(*ifap);
763                                 while (tmp) {
764                                         if (tmp == ifa) {
765                                                 inet_del_ifa(ifa->ifa_dev,
766                                                              ifap, 1);
767                                                 break;
768                                         }
769                                         ifap = &tmp->ifa_next;
770                                         tmp = rtnl_dereference(*ifap);
771                                 }
772                         } else if (ifa->ifa_preferred_lft !=
773                                    INFINITY_LIFE_TIME &&
774                                    age >= ifa->ifa_preferred_lft &&
775                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
776                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
777                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
778                         }
779                 }
780                 rtnl_unlock();
781         }
782
783         next_sec = round_jiffies_up(next);
784         next_sched = next;
785
786         /* If rounded timeout is accurate enough, accept it. */
787         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
788                 next_sched = next_sec;
789
790         now = jiffies;
791         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
792         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
793                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
794
795         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
796                         next_sched - now);
797 }
798
799 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
800                              __u32 prefered_lft)
801 {
802         unsigned long timeout;
803
804         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
805
806         timeout = addrconf_timeout_fixup(valid_lft, HZ);
807         if (addrconf_finite_timeout(timeout))
808                 ifa->ifa_valid_lft = timeout;
809         else
810                 ifa->ifa_flags |= IFA_F_PERMANENT;
811
812         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
813         if (addrconf_finite_timeout(timeout)) {
814                 if (timeout == 0)
815                         ifa->ifa_flags |= IFA_F_DEPRECATED;
816                 ifa->ifa_preferred_lft = timeout;
817         }
818         ifa->ifa_tstamp = jiffies;
819         if (!ifa->ifa_cstamp)
820                 ifa->ifa_cstamp = ifa->ifa_tstamp;
821 }
822
823 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
824                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
825                                        struct netlink_ext_ack *extack)
826 {
827         struct nlattr *tb[IFA_MAX+1];
828         struct in_ifaddr *ifa;
829         struct ifaddrmsg *ifm;
830         struct net_device *dev;
831         struct in_device *in_dev;
832         int err;
833
834         err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
835                                      ifa_ipv4_policy, extack);
836         if (err < 0)
837                 goto errout;
838
839         ifm = nlmsg_data(nlh);
840         err = -EINVAL;
841         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
842                 goto errout;
843
844         dev = __dev_get_by_index(net, ifm->ifa_index);
845         err = -ENODEV;
846         if (!dev)
847                 goto errout;
848
849         in_dev = __in_dev_get_rtnl(dev);
850         err = -ENOBUFS;
851         if (!in_dev)
852                 goto errout;
853
854         ifa = inet_alloc_ifa();
855         if (!ifa)
856                 /*
857                  * A potential indev allocation can be left alive, it stays
858                  * assigned to its device and is destroy with it.
859                  */
860                 goto errout;
861
862         ipv4_devconf_setall(in_dev);
863         neigh_parms_data_state_setall(in_dev->arp_parms);
864         in_dev_hold(in_dev);
865
866         if (!tb[IFA_ADDRESS])
867                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
868
869         INIT_HLIST_NODE(&ifa->hash);
870         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
871         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
872         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
873                                          ifm->ifa_flags;
874         ifa->ifa_scope = ifm->ifa_scope;
875         ifa->ifa_dev = in_dev;
876
877         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
878         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
879
880         if (tb[IFA_BROADCAST])
881                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
882
883         if (tb[IFA_LABEL])
884                 nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
885         else
886                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
887
888         if (tb[IFA_RT_PRIORITY])
889                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
890
891         if (tb[IFA_PROTO])
892                 ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
893
894         if (tb[IFA_CACHEINFO]) {
895                 struct ifa_cacheinfo *ci;
896
897                 ci = nla_data(tb[IFA_CACHEINFO]);
898                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
899                         err = -EINVAL;
900                         goto errout_free;
901                 }
902                 *pvalid_lft = ci->ifa_valid;
903                 *pprefered_lft = ci->ifa_prefered;
904         }
905
906         return ifa;
907
908 errout_free:
909         inet_free_ifa(ifa);
910 errout:
911         return ERR_PTR(err);
912 }
913
914 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
915 {
916         struct in_device *in_dev = ifa->ifa_dev;
917         struct in_ifaddr *ifa1;
918
919         if (!ifa->ifa_local)
920                 return NULL;
921
922         in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
923                 if (ifa1->ifa_mask == ifa->ifa_mask &&
924                     inet_ifa_match(ifa1->ifa_address, ifa) &&
925                     ifa1->ifa_local == ifa->ifa_local)
926                         return ifa1;
927         }
928         return NULL;
929 }
930
931 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
932                             struct netlink_ext_ack *extack)
933 {
934         struct net *net = sock_net(skb->sk);
935         struct in_ifaddr *ifa;
936         struct in_ifaddr *ifa_existing;
937         __u32 valid_lft = INFINITY_LIFE_TIME;
938         __u32 prefered_lft = INFINITY_LIFE_TIME;
939
940         ASSERT_RTNL();
941
942         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
943         if (IS_ERR(ifa))
944                 return PTR_ERR(ifa);
945
946         ifa_existing = find_matching_ifa(ifa);
947         if (!ifa_existing) {
948                 /* It would be best to check for !NLM_F_CREATE here but
949                  * userspace already relies on not having to provide this.
950                  */
951                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
952                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
953                         int ret = ip_mc_autojoin_config(net, true, ifa);
954
955                         if (ret < 0) {
956                                 inet_free_ifa(ifa);
957                                 return ret;
958                         }
959                 }
960                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
961                                          extack);
962         } else {
963                 u32 new_metric = ifa->ifa_rt_priority;
964
965                 inet_free_ifa(ifa);
966
967                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
968                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
969                         return -EEXIST;
970                 ifa = ifa_existing;
971
972                 if (ifa->ifa_rt_priority != new_metric) {
973                         fib_modify_prefix_metric(ifa, new_metric);
974                         ifa->ifa_rt_priority = new_metric;
975                 }
976
977                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
978                 cancel_delayed_work(&check_lifetime_work);
979                 queue_delayed_work(system_power_efficient_wq,
980                                 &check_lifetime_work, 0);
981                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
982         }
983         return 0;
984 }
985
986 /*
987  *      Determine a default network mask, based on the IP address.
988  */
989
990 static int inet_abc_len(__be32 addr)
991 {
992         int rc = -1;    /* Something else, probably a multicast. */
993
994         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
995                 rc = 0;
996         else {
997                 __u32 haddr = ntohl(addr);
998                 if (IN_CLASSA(haddr))
999                         rc = 8;
1000                 else if (IN_CLASSB(haddr))
1001                         rc = 16;
1002                 else if (IN_CLASSC(haddr))
1003                         rc = 24;
1004                 else if (IN_CLASSE(haddr))
1005                         rc = 32;
1006         }
1007
1008         return rc;
1009 }
1010
1011
1012 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1013 {
1014         struct sockaddr_in sin_orig;
1015         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1016         struct in_ifaddr __rcu **ifap = NULL;
1017         struct in_device *in_dev;
1018         struct in_ifaddr *ifa = NULL;
1019         struct net_device *dev;
1020         char *colon;
1021         int ret = -EFAULT;
1022         int tryaddrmatch = 0;
1023
1024         ifr->ifr_name[IFNAMSIZ - 1] = 0;
1025
1026         /* save original address for comparison */
1027         memcpy(&sin_orig, sin, sizeof(*sin));
1028
1029         colon = strchr(ifr->ifr_name, ':');
1030         if (colon)
1031                 *colon = 0;
1032
1033         dev_load(net, ifr->ifr_name);
1034
1035         switch (cmd) {
1036         case SIOCGIFADDR:       /* Get interface address */
1037         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1038         case SIOCGIFDSTADDR:    /* Get the destination address */
1039         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1040                 /* Note that these ioctls will not sleep,
1041                    so that we do not impose a lock.
1042                    One day we will be forced to put shlock here (I mean SMP)
1043                  */
1044                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1045                 memset(sin, 0, sizeof(*sin));
1046                 sin->sin_family = AF_INET;
1047                 break;
1048
1049         case SIOCSIFFLAGS:
1050                 ret = -EPERM;
1051                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1052                         goto out;
1053                 break;
1054         case SIOCSIFADDR:       /* Set interface address (and family) */
1055         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1056         case SIOCSIFDSTADDR:    /* Set the destination address */
1057         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1058                 ret = -EPERM;
1059                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1060                         goto out;
1061                 ret = -EINVAL;
1062                 if (sin->sin_family != AF_INET)
1063                         goto out;
1064                 break;
1065         default:
1066                 ret = -EINVAL;
1067                 goto out;
1068         }
1069
1070         rtnl_lock();
1071
1072         ret = -ENODEV;
1073         dev = __dev_get_by_name(net, ifr->ifr_name);
1074         if (!dev)
1075                 goto done;
1076
1077         if (colon)
1078                 *colon = ':';
1079
1080         in_dev = __in_dev_get_rtnl(dev);
1081         if (in_dev) {
1082                 if (tryaddrmatch) {
1083                         /* Matthias Andree */
1084                         /* compare label and address (4.4BSD style) */
1085                         /* note: we only do this for a limited set of ioctls
1086                            and only if the original address family was AF_INET.
1087                            This is checked above. */
1088
1089                         for (ifap = &in_dev->ifa_list;
1090                              (ifa = rtnl_dereference(*ifap)) != NULL;
1091                              ifap = &ifa->ifa_next) {
1092                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1093                                     sin_orig.sin_addr.s_addr ==
1094                                                         ifa->ifa_local) {
1095                                         break; /* found */
1096                                 }
1097                         }
1098                 }
1099                 /* we didn't get a match, maybe the application is
1100                    4.3BSD-style and passed in junk so we fall back to
1101                    comparing just the label */
1102                 if (!ifa) {
1103                         for (ifap = &in_dev->ifa_list;
1104                              (ifa = rtnl_dereference(*ifap)) != NULL;
1105                              ifap = &ifa->ifa_next)
1106                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1107                                         break;
1108                 }
1109         }
1110
1111         ret = -EADDRNOTAVAIL;
1112         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1113                 goto done;
1114
1115         switch (cmd) {
1116         case SIOCGIFADDR:       /* Get interface address */
1117                 ret = 0;
1118                 sin->sin_addr.s_addr = ifa->ifa_local;
1119                 break;
1120
1121         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1122                 ret = 0;
1123                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1124                 break;
1125
1126         case SIOCGIFDSTADDR:    /* Get the destination address */
1127                 ret = 0;
1128                 sin->sin_addr.s_addr = ifa->ifa_address;
1129                 break;
1130
1131         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1132                 ret = 0;
1133                 sin->sin_addr.s_addr = ifa->ifa_mask;
1134                 break;
1135
1136         case SIOCSIFFLAGS:
1137                 if (colon) {
1138                         ret = -EADDRNOTAVAIL;
1139                         if (!ifa)
1140                                 break;
1141                         ret = 0;
1142                         if (!(ifr->ifr_flags & IFF_UP))
1143                                 inet_del_ifa(in_dev, ifap, 1);
1144                         break;
1145                 }
1146                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1147                 break;
1148
1149         case SIOCSIFADDR:       /* Set interface address (and family) */
1150                 ret = -EINVAL;
1151                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1152                         break;
1153
1154                 if (!ifa) {
1155                         ret = -ENOBUFS;
1156                         ifa = inet_alloc_ifa();
1157                         if (!ifa)
1158                                 break;
1159                         INIT_HLIST_NODE(&ifa->hash);
1160                         if (colon)
1161                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1162                         else
1163                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1164                 } else {
1165                         ret = 0;
1166                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1167                                 break;
1168                         inet_del_ifa(in_dev, ifap, 0);
1169                         ifa->ifa_broadcast = 0;
1170                         ifa->ifa_scope = 0;
1171                 }
1172
1173                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1174
1175                 if (!(dev->flags & IFF_POINTOPOINT)) {
1176                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1177                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1178                         if ((dev->flags & IFF_BROADCAST) &&
1179                             ifa->ifa_prefixlen < 31)
1180                                 ifa->ifa_broadcast = ifa->ifa_address |
1181                                                      ~ifa->ifa_mask;
1182                 } else {
1183                         ifa->ifa_prefixlen = 32;
1184                         ifa->ifa_mask = inet_make_mask(32);
1185                 }
1186                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1187                 ret = inet_set_ifa(dev, ifa);
1188                 break;
1189
1190         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1191                 ret = 0;
1192                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1193                         inet_del_ifa(in_dev, ifap, 0);
1194                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1195                         inet_insert_ifa(ifa);
1196                 }
1197                 break;
1198
1199         case SIOCSIFDSTADDR:    /* Set the destination address */
1200                 ret = 0;
1201                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1202                         break;
1203                 ret = -EINVAL;
1204                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1205                         break;
1206                 ret = 0;
1207                 inet_del_ifa(in_dev, ifap, 0);
1208                 ifa->ifa_address = sin->sin_addr.s_addr;
1209                 inet_insert_ifa(ifa);
1210                 break;
1211
1212         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1213
1214                 /*
1215                  *      The mask we set must be legal.
1216                  */
1217                 ret = -EINVAL;
1218                 if (bad_mask(sin->sin_addr.s_addr, 0))
1219                         break;
1220                 ret = 0;
1221                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1222                         __be32 old_mask = ifa->ifa_mask;
1223                         inet_del_ifa(in_dev, ifap, 0);
1224                         ifa->ifa_mask = sin->sin_addr.s_addr;
1225                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1226
1227                         /* See if current broadcast address matches
1228                          * with current netmask, then recalculate
1229                          * the broadcast address. Otherwise it's a
1230                          * funny address, so don't touch it since
1231                          * the user seems to know what (s)he's doing...
1232                          */
1233                         if ((dev->flags & IFF_BROADCAST) &&
1234                             (ifa->ifa_prefixlen < 31) &&
1235                             (ifa->ifa_broadcast ==
1236                              (ifa->ifa_local|~old_mask))) {
1237                                 ifa->ifa_broadcast = (ifa->ifa_local |
1238                                                       ~sin->sin_addr.s_addr);
1239                         }
1240                         inet_insert_ifa(ifa);
1241                 }
1242                 break;
1243         }
1244 done:
1245         rtnl_unlock();
1246 out:
1247         return ret;
1248 }
1249
1250 int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1251 {
1252         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1253         const struct in_ifaddr *ifa;
1254         struct ifreq ifr;
1255         int done = 0;
1256
1257         if (WARN_ON(size > sizeof(struct ifreq)))
1258                 goto out;
1259
1260         if (!in_dev)
1261                 goto out;
1262
1263         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1264                 if (!buf) {
1265                         done += size;
1266                         continue;
1267                 }
1268                 if (len < size)
1269                         break;
1270                 memset(&ifr, 0, sizeof(struct ifreq));
1271                 strcpy(ifr.ifr_name, ifa->ifa_label);
1272
1273                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1274                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1275                                                                 ifa->ifa_local;
1276
1277                 if (copy_to_user(buf + done, &ifr, size)) {
1278                         done = -EFAULT;
1279                         break;
1280                 }
1281                 len  -= size;
1282                 done += size;
1283         }
1284 out:
1285         return done;
1286 }
1287
1288 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1289                                  int scope)
1290 {
1291         const struct in_ifaddr *ifa;
1292
1293         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1294                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1295                         continue;
1296                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1297                     ifa->ifa_scope <= scope)
1298                         return ifa->ifa_local;
1299         }
1300
1301         return 0;
1302 }
1303
1304 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1305 {
1306         const struct in_ifaddr *ifa;
1307         __be32 addr = 0;
1308         unsigned char localnet_scope = RT_SCOPE_HOST;
1309         struct in_device *in_dev;
1310         struct net *net = dev_net(dev);
1311         int master_idx;
1312
1313         rcu_read_lock();
1314         in_dev = __in_dev_get_rcu(dev);
1315         if (!in_dev)
1316                 goto no_in_dev;
1317
1318         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1319                 localnet_scope = RT_SCOPE_LINK;
1320
1321         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1322                 if (ifa->ifa_flags & IFA_F_SECONDARY)
1323                         continue;
1324                 if (min(ifa->ifa_scope, localnet_scope) > scope)
1325                         continue;
1326                 if (!dst || inet_ifa_match(dst, ifa)) {
1327                         addr = ifa->ifa_local;
1328                         break;
1329                 }
1330                 if (!addr)
1331                         addr = ifa->ifa_local;
1332         }
1333
1334         if (addr)
1335                 goto out_unlock;
1336 no_in_dev:
1337         master_idx = l3mdev_master_ifindex_rcu(dev);
1338
1339         /* For VRFs, the VRF device takes the place of the loopback device,
1340          * with addresses on it being preferred.  Note in such cases the
1341          * loopback device will be among the devices that fail the master_idx
1342          * equality check in the loop below.
1343          */
1344         if (master_idx &&
1345             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1346             (in_dev = __in_dev_get_rcu(dev))) {
1347                 addr = in_dev_select_addr(in_dev, scope);
1348                 if (addr)
1349                         goto out_unlock;
1350         }
1351
1352         /* Not loopback addresses on loopback should be preferred
1353            in this case. It is important that lo is the first interface
1354            in dev_base list.
1355          */
1356         for_each_netdev_rcu(net, dev) {
1357                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1358                         continue;
1359
1360                 in_dev = __in_dev_get_rcu(dev);
1361                 if (!in_dev)
1362                         continue;
1363
1364                 addr = in_dev_select_addr(in_dev, scope);
1365                 if (addr)
1366                         goto out_unlock;
1367         }
1368 out_unlock:
1369         rcu_read_unlock();
1370         return addr;
1371 }
1372 EXPORT_SYMBOL(inet_select_addr);
1373
1374 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1375                               __be32 local, int scope)
1376 {
1377         unsigned char localnet_scope = RT_SCOPE_HOST;
1378         const struct in_ifaddr *ifa;
1379         __be32 addr = 0;
1380         int same = 0;
1381
1382         if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1383                 localnet_scope = RT_SCOPE_LINK;
1384
1385         in_dev_for_each_ifa_rcu(ifa, in_dev) {
1386                 unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1387
1388                 if (!addr &&
1389                     (local == ifa->ifa_local || !local) &&
1390                     min_scope <= scope) {
1391                         addr = ifa->ifa_local;
1392                         if (same)
1393                                 break;
1394                 }
1395                 if (!same) {
1396                         same = (!local || inet_ifa_match(local, ifa)) &&
1397                                 (!dst || inet_ifa_match(dst, ifa));
1398                         if (same && addr) {
1399                                 if (local || !dst)
1400                                         break;
1401                                 /* Is the selected addr into dst subnet? */
1402                                 if (inet_ifa_match(addr, ifa))
1403                                         break;
1404                                 /* No, then can we use new local src? */
1405                                 if (min_scope <= scope) {
1406                                         addr = ifa->ifa_local;
1407                                         break;
1408                                 }
1409                                 /* search for large dst subnet for addr */
1410                                 same = 0;
1411                         }
1412                 }
1413         }
1414
1415         return same ? addr : 0;
1416 }
1417
1418 /*
1419  * Confirm that local IP address exists using wildcards:
1420  * - net: netns to check, cannot be NULL
1421  * - in_dev: only on this interface, NULL=any interface
1422  * - dst: only in the same subnet as dst, 0=any dst
1423  * - local: address, 0=autoselect the local address
1424  * - scope: maximum allowed scope value for the local address
1425  */
1426 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1427                          __be32 dst, __be32 local, int scope)
1428 {
1429         __be32 addr = 0;
1430         struct net_device *dev;
1431
1432         if (in_dev)
1433                 return confirm_addr_indev(in_dev, dst, local, scope);
1434
1435         rcu_read_lock();
1436         for_each_netdev_rcu(net, dev) {
1437                 in_dev = __in_dev_get_rcu(dev);
1438                 if (in_dev) {
1439                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1440                         if (addr)
1441                                 break;
1442                 }
1443         }
1444         rcu_read_unlock();
1445
1446         return addr;
1447 }
1448 EXPORT_SYMBOL(inet_confirm_addr);
1449
1450 /*
1451  *      Device notifier
1452  */
1453
1454 int register_inetaddr_notifier(struct notifier_block *nb)
1455 {
1456         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1457 }
1458 EXPORT_SYMBOL(register_inetaddr_notifier);
1459
1460 int unregister_inetaddr_notifier(struct notifier_block *nb)
1461 {
1462         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1463 }
1464 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1465
1466 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1467 {
1468         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1469 }
1470 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1471
1472 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1473 {
1474         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1475             nb);
1476 }
1477 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1478
1479 /* Rename ifa_labels for a device name change. Make some effort to preserve
1480  * existing alias numbering and to create unique labels if possible.
1481 */
1482 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1483 {
1484         struct in_ifaddr *ifa;
1485         int named = 0;
1486
1487         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1488                 char old[IFNAMSIZ], *dot;
1489
1490                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1491                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1492                 if (named++ == 0)
1493                         goto skip;
1494                 dot = strchr(old, ':');
1495                 if (!dot) {
1496                         sprintf(old, ":%d", named);
1497                         dot = old;
1498                 }
1499                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1500                         strcat(ifa->ifa_label, dot);
1501                 else
1502                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1503 skip:
1504                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1505         }
1506 }
1507
1508 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1509                                         struct in_device *in_dev)
1510
1511 {
1512         const struct in_ifaddr *ifa;
1513
1514         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1515                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1516                          ifa->ifa_local, dev,
1517                          ifa->ifa_local, NULL,
1518                          dev->dev_addr, NULL);
1519         }
1520 }
1521
1522 /* Called only under RTNL semaphore */
1523
1524 static int inetdev_event(struct notifier_block *this, unsigned long event,
1525                          void *ptr)
1526 {
1527         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1528         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1529
1530         ASSERT_RTNL();
1531
1532         if (!in_dev) {
1533                 if (event == NETDEV_REGISTER) {
1534                         in_dev = inetdev_init(dev);
1535                         if (IS_ERR(in_dev))
1536                                 return notifier_from_errno(PTR_ERR(in_dev));
1537                         if (dev->flags & IFF_LOOPBACK) {
1538                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1539                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1540                         }
1541                 } else if (event == NETDEV_CHANGEMTU) {
1542                         /* Re-enabling IP */
1543                         if (inetdev_valid_mtu(dev->mtu))
1544                                 in_dev = inetdev_init(dev);
1545                 }
1546                 goto out;
1547         }
1548
1549         switch (event) {
1550         case NETDEV_REGISTER:
1551                 pr_debug("%s: bug\n", __func__);
1552                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1553                 break;
1554         case NETDEV_UP:
1555                 if (!inetdev_valid_mtu(dev->mtu))
1556                         break;
1557                 if (dev->flags & IFF_LOOPBACK) {
1558                         struct in_ifaddr *ifa = inet_alloc_ifa();
1559
1560                         if (ifa) {
1561                                 INIT_HLIST_NODE(&ifa->hash);
1562                                 ifa->ifa_local =
1563                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1564                                 ifa->ifa_prefixlen = 8;
1565                                 ifa->ifa_mask = inet_make_mask(8);
1566                                 in_dev_hold(in_dev);
1567                                 ifa->ifa_dev = in_dev;
1568                                 ifa->ifa_scope = RT_SCOPE_HOST;
1569                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1570                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1571                                                  INFINITY_LIFE_TIME);
1572                                 ipv4_devconf_setall(in_dev);
1573                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1574                                 inet_insert_ifa(ifa);
1575                         }
1576                 }
1577                 ip_mc_up(in_dev);
1578                 fallthrough;
1579         case NETDEV_CHANGEADDR:
1580                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1581                         break;
1582                 fallthrough;
1583         case NETDEV_NOTIFY_PEERS:
1584                 /* Send gratuitous ARP to notify of link change */
1585                 inetdev_send_gratuitous_arp(dev, in_dev);
1586                 break;
1587         case NETDEV_DOWN:
1588                 ip_mc_down(in_dev);
1589                 break;
1590         case NETDEV_PRE_TYPE_CHANGE:
1591                 ip_mc_unmap(in_dev);
1592                 break;
1593         case NETDEV_POST_TYPE_CHANGE:
1594                 ip_mc_remap(in_dev);
1595                 break;
1596         case NETDEV_CHANGEMTU:
1597                 if (inetdev_valid_mtu(dev->mtu))
1598                         break;
1599                 /* disable IP when MTU is not enough */
1600                 fallthrough;
1601         case NETDEV_UNREGISTER:
1602                 inetdev_destroy(in_dev);
1603                 break;
1604         case NETDEV_CHANGENAME:
1605                 /* Do not notify about label change, this event is
1606                  * not interesting to applications using netlink.
1607                  */
1608                 inetdev_changename(dev, in_dev);
1609
1610                 devinet_sysctl_unregister(in_dev);
1611                 devinet_sysctl_register(in_dev);
1612                 break;
1613         }
1614 out:
1615         return NOTIFY_DONE;
1616 }
1617
1618 static struct notifier_block ip_netdev_notifier = {
1619         .notifier_call = inetdev_event,
1620 };
1621
1622 static size_t inet_nlmsg_size(void)
1623 {
1624         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1625                + nla_total_size(4) /* IFA_ADDRESS */
1626                + nla_total_size(4) /* IFA_LOCAL */
1627                + nla_total_size(4) /* IFA_BROADCAST */
1628                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1629                + nla_total_size(4)  /* IFA_FLAGS */
1630                + nla_total_size(1)  /* IFA_PROTO */
1631                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1632                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1633 }
1634
1635 static inline u32 cstamp_delta(unsigned long cstamp)
1636 {
1637         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1638 }
1639
1640 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1641                          unsigned long tstamp, u32 preferred, u32 valid)
1642 {
1643         struct ifa_cacheinfo ci;
1644
1645         ci.cstamp = cstamp_delta(cstamp);
1646         ci.tstamp = cstamp_delta(tstamp);
1647         ci.ifa_prefered = preferred;
1648         ci.ifa_valid = valid;
1649
1650         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1651 }
1652
1653 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1654                             struct inet_fill_args *args)
1655 {
1656         struct ifaddrmsg *ifm;
1657         struct nlmsghdr  *nlh;
1658         u32 preferred, valid;
1659
1660         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1661                         args->flags);
1662         if (!nlh)
1663                 return -EMSGSIZE;
1664
1665         ifm = nlmsg_data(nlh);
1666         ifm->ifa_family = AF_INET;
1667         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1668         ifm->ifa_flags = ifa->ifa_flags;
1669         ifm->ifa_scope = ifa->ifa_scope;
1670         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1671
1672         if (args->netnsid >= 0 &&
1673             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1674                 goto nla_put_failure;
1675
1676         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1677                 preferred = ifa->ifa_preferred_lft;
1678                 valid = ifa->ifa_valid_lft;
1679                 if (preferred != INFINITY_LIFE_TIME) {
1680                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1681
1682                         if (preferred > tval)
1683                                 preferred -= tval;
1684                         else
1685                                 preferred = 0;
1686                         if (valid != INFINITY_LIFE_TIME) {
1687                                 if (valid > tval)
1688                                         valid -= tval;
1689                                 else
1690                                         valid = 0;
1691                         }
1692                 }
1693         } else {
1694                 preferred = INFINITY_LIFE_TIME;
1695                 valid = INFINITY_LIFE_TIME;
1696         }
1697         if ((ifa->ifa_address &&
1698              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1699             (ifa->ifa_local &&
1700              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1701             (ifa->ifa_broadcast &&
1702              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1703             (ifa->ifa_label[0] &&
1704              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1705             (ifa->ifa_proto &&
1706              nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1707             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1708             (ifa->ifa_rt_priority &&
1709              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1710             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1711                           preferred, valid))
1712                 goto nla_put_failure;
1713
1714         nlmsg_end(skb, nlh);
1715         return 0;
1716
1717 nla_put_failure:
1718         nlmsg_cancel(skb, nlh);
1719         return -EMSGSIZE;
1720 }
1721
1722 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1723                                       struct inet_fill_args *fillargs,
1724                                       struct net **tgt_net, struct sock *sk,
1725                                       struct netlink_callback *cb)
1726 {
1727         struct netlink_ext_ack *extack = cb->extack;
1728         struct nlattr *tb[IFA_MAX+1];
1729         struct ifaddrmsg *ifm;
1730         int err, i;
1731
1732         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1733                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1734                 return -EINVAL;
1735         }
1736
1737         ifm = nlmsg_data(nlh);
1738         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1739                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1740                 return -EINVAL;
1741         }
1742
1743         fillargs->ifindex = ifm->ifa_index;
1744         if (fillargs->ifindex) {
1745                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1746                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1747         }
1748
1749         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1750                                             ifa_ipv4_policy, extack);
1751         if (err < 0)
1752                 return err;
1753
1754         for (i = 0; i <= IFA_MAX; ++i) {
1755                 if (!tb[i])
1756                         continue;
1757
1758                 if (i == IFA_TARGET_NETNSID) {
1759                         struct net *net;
1760
1761                         fillargs->netnsid = nla_get_s32(tb[i]);
1762
1763                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1764                         if (IS_ERR(net)) {
1765                                 fillargs->netnsid = -1;
1766                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1767                                 return PTR_ERR(net);
1768                         }
1769                         *tgt_net = net;
1770                 } else {
1771                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1772                         return -EINVAL;
1773                 }
1774         }
1775
1776         return 0;
1777 }
1778
1779 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1780                             struct netlink_callback *cb, int s_ip_idx,
1781                             struct inet_fill_args *fillargs)
1782 {
1783         struct in_ifaddr *ifa;
1784         int ip_idx = 0;
1785         int err;
1786
1787         in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1788                 if (ip_idx < s_ip_idx) {
1789                         ip_idx++;
1790                         continue;
1791                 }
1792                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1793                 if (err < 0)
1794                         goto done;
1795
1796                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1797                 ip_idx++;
1798         }
1799         err = 0;
1800
1801 done:
1802         cb->args[2] = ip_idx;
1803
1804         return err;
1805 }
1806
1807 /* Combine dev_addr_genid and dev_base_seq to detect changes.
1808  */
1809 static u32 inet_base_seq(const struct net *net)
1810 {
1811         u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1812                   net->dev_base_seq;
1813
1814         /* Must not return 0 (see nl_dump_check_consistent()).
1815          * Chose a value far away from 0.
1816          */
1817         if (!res)
1818                 res = 0x80000000;
1819         return res;
1820 }
1821
1822 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1823 {
1824         const struct nlmsghdr *nlh = cb->nlh;
1825         struct inet_fill_args fillargs = {
1826                 .portid = NETLINK_CB(cb->skb).portid,
1827                 .seq = nlh->nlmsg_seq,
1828                 .event = RTM_NEWADDR,
1829                 .flags = NLM_F_MULTI,
1830                 .netnsid = -1,
1831         };
1832         struct net *net = sock_net(skb->sk);
1833         struct net *tgt_net = net;
1834         int h, s_h;
1835         int idx, s_idx;
1836         int s_ip_idx;
1837         struct net_device *dev;
1838         struct in_device *in_dev;
1839         struct hlist_head *head;
1840         int err = 0;
1841
1842         s_h = cb->args[0];
1843         s_idx = idx = cb->args[1];
1844         s_ip_idx = cb->args[2];
1845
1846         if (cb->strict_check) {
1847                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1848                                                  skb->sk, cb);
1849                 if (err < 0)
1850                         goto put_tgt_net;
1851
1852                 err = 0;
1853                 if (fillargs.ifindex) {
1854                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1855                         if (!dev) {
1856                                 err = -ENODEV;
1857                                 goto put_tgt_net;
1858                         }
1859
1860                         in_dev = __in_dev_get_rtnl(dev);
1861                         if (in_dev) {
1862                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1863                                                        &fillargs);
1864                         }
1865                         goto put_tgt_net;
1866                 }
1867         }
1868
1869         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1870                 idx = 0;
1871                 head = &tgt_net->dev_index_head[h];
1872                 rcu_read_lock();
1873                 cb->seq = inet_base_seq(tgt_net);
1874                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1875                         if (idx < s_idx)
1876                                 goto cont;
1877                         if (h > s_h || idx > s_idx)
1878                                 s_ip_idx = 0;
1879                         in_dev = __in_dev_get_rcu(dev);
1880                         if (!in_dev)
1881                                 goto cont;
1882
1883                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1884                                                &fillargs);
1885                         if (err < 0) {
1886                                 rcu_read_unlock();
1887                                 goto done;
1888                         }
1889 cont:
1890                         idx++;
1891                 }
1892                 rcu_read_unlock();
1893         }
1894
1895 done:
1896         cb->args[0] = h;
1897         cb->args[1] = idx;
1898 put_tgt_net:
1899         if (fillargs.netnsid >= 0)
1900                 put_net(tgt_net);
1901
1902         return skb->len ? : err;
1903 }
1904
1905 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1906                       u32 portid)
1907 {
1908         struct inet_fill_args fillargs = {
1909                 .portid = portid,
1910                 .seq = nlh ? nlh->nlmsg_seq : 0,
1911                 .event = event,
1912                 .flags = 0,
1913                 .netnsid = -1,
1914         };
1915         struct sk_buff *skb;
1916         int err = -ENOBUFS;
1917         struct net *net;
1918
1919         net = dev_net(ifa->ifa_dev->dev);
1920         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1921         if (!skb)
1922                 goto errout;
1923
1924         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1925         if (err < 0) {
1926                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1927                 WARN_ON(err == -EMSGSIZE);
1928                 kfree_skb(skb);
1929                 goto errout;
1930         }
1931         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1932         return;
1933 errout:
1934         if (err < 0)
1935                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1936 }
1937
1938 static size_t inet_get_link_af_size(const struct net_device *dev,
1939                                     u32 ext_filter_mask)
1940 {
1941         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1942
1943         if (!in_dev)
1944                 return 0;
1945
1946         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1947 }
1948
1949 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1950                              u32 ext_filter_mask)
1951 {
1952         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1953         struct nlattr *nla;
1954         int i;
1955
1956         if (!in_dev)
1957                 return -ENODATA;
1958
1959         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1960         if (!nla)
1961                 return -EMSGSIZE;
1962
1963         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1964                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1965
1966         return 0;
1967 }
1968
1969 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1970         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1971 };
1972
1973 static int inet_validate_link_af(const struct net_device *dev,
1974                                  const struct nlattr *nla,
1975                                  struct netlink_ext_ack *extack)
1976 {
1977         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1978         int err, rem;
1979
1980         if (dev && !__in_dev_get_rtnl(dev))
1981                 return -EAFNOSUPPORT;
1982
1983         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1984                                           inet_af_policy, extack);
1985         if (err < 0)
1986                 return err;
1987
1988         if (tb[IFLA_INET_CONF]) {
1989                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1990                         int cfgid = nla_type(a);
1991
1992                         if (nla_len(a) < 4)
1993                                 return -EINVAL;
1994
1995                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1996                                 return -EINVAL;
1997                 }
1998         }
1999
2000         return 0;
2001 }
2002
2003 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2004                             struct netlink_ext_ack *extack)
2005 {
2006         struct in_device *in_dev = __in_dev_get_rtnl(dev);
2007         struct nlattr *a, *tb[IFLA_INET_MAX+1];
2008         int rem;
2009
2010         if (!in_dev)
2011                 return -EAFNOSUPPORT;
2012
2013         if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2014                 return -EINVAL;
2015
2016         if (tb[IFLA_INET_CONF]) {
2017                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2018                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2019         }
2020
2021         return 0;
2022 }
2023
2024 static int inet_netconf_msgsize_devconf(int type)
2025 {
2026         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2027                    + nla_total_size(4); /* NETCONFA_IFINDEX */
2028         bool all = false;
2029
2030         if (type == NETCONFA_ALL)
2031                 all = true;
2032
2033         if (all || type == NETCONFA_FORWARDING)
2034                 size += nla_total_size(4);
2035         if (all || type == NETCONFA_RP_FILTER)
2036                 size += nla_total_size(4);
2037         if (all || type == NETCONFA_MC_FORWARDING)
2038                 size += nla_total_size(4);
2039         if (all || type == NETCONFA_BC_FORWARDING)
2040                 size += nla_total_size(4);
2041         if (all || type == NETCONFA_PROXY_NEIGH)
2042                 size += nla_total_size(4);
2043         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2044                 size += nla_total_size(4);
2045
2046         return size;
2047 }
2048
2049 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2050                                      struct ipv4_devconf *devconf, u32 portid,
2051                                      u32 seq, int event, unsigned int flags,
2052                                      int type)
2053 {
2054         struct nlmsghdr  *nlh;
2055         struct netconfmsg *ncm;
2056         bool all = false;
2057
2058         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2059                         flags);
2060         if (!nlh)
2061                 return -EMSGSIZE;
2062
2063         if (type == NETCONFA_ALL)
2064                 all = true;
2065
2066         ncm = nlmsg_data(nlh);
2067         ncm->ncm_family = AF_INET;
2068
2069         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2070                 goto nla_put_failure;
2071
2072         if (!devconf)
2073                 goto out;
2074
2075         if ((all || type == NETCONFA_FORWARDING) &&
2076             nla_put_s32(skb, NETCONFA_FORWARDING,
2077                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2078                 goto nla_put_failure;
2079         if ((all || type == NETCONFA_RP_FILTER) &&
2080             nla_put_s32(skb, NETCONFA_RP_FILTER,
2081                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2082                 goto nla_put_failure;
2083         if ((all || type == NETCONFA_MC_FORWARDING) &&
2084             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2085                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2086                 goto nla_put_failure;
2087         if ((all || type == NETCONFA_BC_FORWARDING) &&
2088             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2089                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2090                 goto nla_put_failure;
2091         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2092             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2093                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2094                 goto nla_put_failure;
2095         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2096             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2097                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2098                 goto nla_put_failure;
2099
2100 out:
2101         nlmsg_end(skb, nlh);
2102         return 0;
2103
2104 nla_put_failure:
2105         nlmsg_cancel(skb, nlh);
2106         return -EMSGSIZE;
2107 }
2108
2109 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2110                                  int ifindex, struct ipv4_devconf *devconf)
2111 {
2112         struct sk_buff *skb;
2113         int err = -ENOBUFS;
2114
2115         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2116         if (!skb)
2117                 goto errout;
2118
2119         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2120                                         event, 0, type);
2121         if (err < 0) {
2122                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2123                 WARN_ON(err == -EMSGSIZE);
2124                 kfree_skb(skb);
2125                 goto errout;
2126         }
2127         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2128         return;
2129 errout:
2130         if (err < 0)
2131                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2132 }
2133
2134 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2135         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2136         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2137         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2138         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2139         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2140 };
2141
2142 static int inet_netconf_valid_get_req(struct sk_buff *skb,
2143                                       const struct nlmsghdr *nlh,
2144                                       struct nlattr **tb,
2145                                       struct netlink_ext_ack *extack)
2146 {
2147         int i, err;
2148
2149         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2150                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2151                 return -EINVAL;
2152         }
2153
2154         if (!netlink_strict_get_check(skb))
2155                 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2156                                               tb, NETCONFA_MAX,
2157                                               devconf_ipv4_policy, extack);
2158
2159         err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2160                                             tb, NETCONFA_MAX,
2161                                             devconf_ipv4_policy, extack);
2162         if (err)
2163                 return err;
2164
2165         for (i = 0; i <= NETCONFA_MAX; i++) {
2166                 if (!tb[i])
2167                         continue;
2168
2169                 switch (i) {
2170                 case NETCONFA_IFINDEX:
2171                         break;
2172                 default:
2173                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2174                         return -EINVAL;
2175                 }
2176         }
2177
2178         return 0;
2179 }
2180
2181 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2182                                     struct nlmsghdr *nlh,
2183                                     struct netlink_ext_ack *extack)
2184 {
2185         struct net *net = sock_net(in_skb->sk);
2186         struct nlattr *tb[NETCONFA_MAX+1];
2187         struct sk_buff *skb;
2188         struct ipv4_devconf *devconf;
2189         struct in_device *in_dev;
2190         struct net_device *dev;
2191         int ifindex;
2192         int err;
2193
2194         err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2195         if (err)
2196                 goto errout;
2197
2198         err = -EINVAL;
2199         if (!tb[NETCONFA_IFINDEX])
2200                 goto errout;
2201
2202         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2203         switch (ifindex) {
2204         case NETCONFA_IFINDEX_ALL:
2205                 devconf = net->ipv4.devconf_all;
2206                 break;
2207         case NETCONFA_IFINDEX_DEFAULT:
2208                 devconf = net->ipv4.devconf_dflt;
2209                 break;
2210         default:
2211                 dev = __dev_get_by_index(net, ifindex);
2212                 if (!dev)
2213                         goto errout;
2214                 in_dev = __in_dev_get_rtnl(dev);
2215                 if (!in_dev)
2216                         goto errout;
2217                 devconf = &in_dev->cnf;
2218                 break;
2219         }
2220
2221         err = -ENOBUFS;
2222         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2223         if (!skb)
2224                 goto errout;
2225
2226         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2227                                         NETLINK_CB(in_skb).portid,
2228                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2229                                         NETCONFA_ALL);
2230         if (err < 0) {
2231                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2232                 WARN_ON(err == -EMSGSIZE);
2233                 kfree_skb(skb);
2234                 goto errout;
2235         }
2236         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2237 errout:
2238         return err;
2239 }
2240
2241 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2242                                      struct netlink_callback *cb)
2243 {
2244         const struct nlmsghdr *nlh = cb->nlh;
2245         struct net *net = sock_net(skb->sk);
2246         int h, s_h;
2247         int idx, s_idx;
2248         struct net_device *dev;
2249         struct in_device *in_dev;
2250         struct hlist_head *head;
2251
2252         if (cb->strict_check) {
2253                 struct netlink_ext_ack *extack = cb->extack;
2254                 struct netconfmsg *ncm;
2255
2256                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2257                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2258                         return -EINVAL;
2259                 }
2260
2261                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2262                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2263                         return -EINVAL;
2264                 }
2265         }
2266
2267         s_h = cb->args[0];
2268         s_idx = idx = cb->args[1];
2269
2270         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2271                 idx = 0;
2272                 head = &net->dev_index_head[h];
2273                 rcu_read_lock();
2274                 cb->seq = inet_base_seq(net);
2275                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2276                         if (idx < s_idx)
2277                                 goto cont;
2278                         in_dev = __in_dev_get_rcu(dev);
2279                         if (!in_dev)
2280                                 goto cont;
2281
2282                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2283                                                       &in_dev->cnf,
2284                                                       NETLINK_CB(cb->skb).portid,
2285                                                       nlh->nlmsg_seq,
2286                                                       RTM_NEWNETCONF,
2287                                                       NLM_F_MULTI,
2288                                                       NETCONFA_ALL) < 0) {
2289                                 rcu_read_unlock();
2290                                 goto done;
2291                         }
2292                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2293 cont:
2294                         idx++;
2295                 }
2296                 rcu_read_unlock();
2297         }
2298         if (h == NETDEV_HASHENTRIES) {
2299                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2300                                               net->ipv4.devconf_all,
2301                                               NETLINK_CB(cb->skb).portid,
2302                                               nlh->nlmsg_seq,
2303                                               RTM_NEWNETCONF, NLM_F_MULTI,
2304                                               NETCONFA_ALL) < 0)
2305                         goto done;
2306                 else
2307                         h++;
2308         }
2309         if (h == NETDEV_HASHENTRIES + 1) {
2310                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2311                                               net->ipv4.devconf_dflt,
2312                                               NETLINK_CB(cb->skb).portid,
2313                                               nlh->nlmsg_seq,
2314                                               RTM_NEWNETCONF, NLM_F_MULTI,
2315                                               NETCONFA_ALL) < 0)
2316                         goto done;
2317                 else
2318                         h++;
2319         }
2320 done:
2321         cb->args[0] = h;
2322         cb->args[1] = idx;
2323
2324         return skb->len;
2325 }
2326
2327 #ifdef CONFIG_SYSCTL
2328
2329 static void devinet_copy_dflt_conf(struct net *net, int i)
2330 {
2331         struct net_device *dev;
2332
2333         rcu_read_lock();
2334         for_each_netdev_rcu(net, dev) {
2335                 struct in_device *in_dev;
2336
2337                 in_dev = __in_dev_get_rcu(dev);
2338                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2339                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2340         }
2341         rcu_read_unlock();
2342 }
2343
2344 /* called with RTNL locked */
2345 static void inet_forward_change(struct net *net)
2346 {
2347         struct net_device *dev;
2348         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2349
2350         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2351         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2352         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2353                                     NETCONFA_FORWARDING,
2354                                     NETCONFA_IFINDEX_ALL,
2355                                     net->ipv4.devconf_all);
2356         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2357                                     NETCONFA_FORWARDING,
2358                                     NETCONFA_IFINDEX_DEFAULT,
2359                                     net->ipv4.devconf_dflt);
2360
2361         for_each_netdev(net, dev) {
2362                 struct in_device *in_dev;
2363
2364                 if (on)
2365                         dev_disable_lro(dev);
2366
2367                 in_dev = __in_dev_get_rtnl(dev);
2368                 if (in_dev) {
2369                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2370                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2371                                                     NETCONFA_FORWARDING,
2372                                                     dev->ifindex, &in_dev->cnf);
2373                 }
2374         }
2375 }
2376
2377 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2378 {
2379         if (cnf == net->ipv4.devconf_dflt)
2380                 return NETCONFA_IFINDEX_DEFAULT;
2381         else if (cnf == net->ipv4.devconf_all)
2382                 return NETCONFA_IFINDEX_ALL;
2383         else {
2384                 struct in_device *idev
2385                         = container_of(cnf, struct in_device, cnf);
2386                 return idev->dev->ifindex;
2387         }
2388 }
2389
2390 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2391                              void *buffer, size_t *lenp, loff_t *ppos)
2392 {
2393         int old_value = *(int *)ctl->data;
2394         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2395         int new_value = *(int *)ctl->data;
2396
2397         if (write) {
2398                 struct ipv4_devconf *cnf = ctl->extra1;
2399                 struct net *net = ctl->extra2;
2400                 int i = (int *)ctl->data - cnf->data;
2401                 int ifindex;
2402
2403                 set_bit(i, cnf->state);
2404
2405                 if (cnf == net->ipv4.devconf_dflt)
2406                         devinet_copy_dflt_conf(net, i);
2407                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2408                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2409                         if ((new_value == 0) && (old_value != 0))
2410                                 rt_cache_flush(net);
2411
2412                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2413                     new_value != old_value)
2414                         rt_cache_flush(net);
2415
2416                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2417                     new_value != old_value) {
2418                         ifindex = devinet_conf_ifindex(net, cnf);
2419                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2420                                                     NETCONFA_RP_FILTER,
2421                                                     ifindex, cnf);
2422                 }
2423                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2424                     new_value != old_value) {
2425                         ifindex = devinet_conf_ifindex(net, cnf);
2426                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2427                                                     NETCONFA_PROXY_NEIGH,
2428                                                     ifindex, cnf);
2429                 }
2430                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2431                     new_value != old_value) {
2432                         ifindex = devinet_conf_ifindex(net, cnf);
2433                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2434                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2435                                                     ifindex, cnf);
2436                 }
2437         }
2438
2439         return ret;
2440 }
2441
2442 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2443                                   void *buffer, size_t *lenp, loff_t *ppos)
2444 {
2445         int *valp = ctl->data;
2446         int val = *valp;
2447         loff_t pos = *ppos;
2448         struct net *net = ctl->extra2;
2449         int ret;
2450
2451         if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2452                 return -EPERM;
2453
2454         ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2455
2456         if (write && *valp != val) {
2457                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2458                         if (!rtnl_trylock()) {
2459                                 /* Restore the original values before restarting */
2460                                 *valp = val;
2461                                 *ppos = pos;
2462                                 return restart_syscall();
2463                         }
2464                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2465                                 inet_forward_change(net);
2466                         } else {
2467                                 struct ipv4_devconf *cnf = ctl->extra1;
2468                                 struct in_device *idev =
2469                                         container_of(cnf, struct in_device, cnf);
2470                                 if (*valp)
2471                                         dev_disable_lro(idev->dev);
2472                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2473                                                             NETCONFA_FORWARDING,
2474                                                             idev->dev->ifindex,
2475                                                             cnf);
2476                         }
2477                         rtnl_unlock();
2478                         rt_cache_flush(net);
2479                 } else
2480                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2481                                                     NETCONFA_FORWARDING,
2482                                                     NETCONFA_IFINDEX_DEFAULT,
2483                                                     net->ipv4.devconf_dflt);
2484         }
2485
2486         return ret;
2487 }
2488
2489 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2490                                 void *buffer, size_t *lenp, loff_t *ppos)
2491 {
2492         int *valp = ctl->data;
2493         int val = *valp;
2494         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2495         struct net *net = ctl->extra2;
2496
2497         if (write && *valp != val)
2498                 rt_cache_flush(net);
2499
2500         return ret;
2501 }
2502
2503 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2504         { \
2505                 .procname       = name, \
2506                 .data           = ipv4_devconf.data + \
2507                                   IPV4_DEVCONF_ ## attr - 1, \
2508                 .maxlen         = sizeof(int), \
2509                 .mode           = mval, \
2510                 .proc_handler   = proc, \
2511                 .extra1         = &ipv4_devconf, \
2512         }
2513
2514 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2515         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2516
2517 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2518         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2519
2520 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2521         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2522
2523 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2524         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2525
2526 static struct devinet_sysctl_table {
2527         struct ctl_table_header *sysctl_header;
2528         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2529 } devinet_sysctl = {
2530         .devinet_vars = {
2531                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2532                                              devinet_sysctl_forward),
2533                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2534                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2535
2536                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2537                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2538                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2539                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2540                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2541                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2542                                         "accept_source_route"),
2543                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2544                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2545                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2546                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2547                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2548                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2549                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2550                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2551                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2552                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2553                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2554                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2555                 DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2556                                         "arp_evict_nocarrier"),
2557                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2558                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2559                                         "force_igmp_version"),
2560                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2561                                         "igmpv2_unsolicited_report_interval"),
2562                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2563                                         "igmpv3_unsolicited_report_interval"),
2564                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2565                                         "ignore_routes_with_linkdown"),
2566                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2567                                         "drop_gratuitous_arp"),
2568
2569                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2570                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2571                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2572                                               "promote_secondaries"),
2573                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2574                                               "route_localnet"),
2575                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2576                                               "drop_unicast_in_l2_multicast"),
2577         },
2578 };
2579
2580 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2581                                      int ifindex, struct ipv4_devconf *p)
2582 {
2583         int i;
2584         struct devinet_sysctl_table *t;
2585         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2586
2587         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2588         if (!t)
2589                 goto out;
2590
2591         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2592                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2593                 t->devinet_vars[i].extra1 = p;
2594                 t->devinet_vars[i].extra2 = net;
2595         }
2596
2597         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2598
2599         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2600         if (!t->sysctl_header)
2601                 goto free;
2602
2603         p->sysctl = t;
2604
2605         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2606                                     ifindex, p);
2607         return 0;
2608
2609 free:
2610         kfree(t);
2611 out:
2612         return -ENOMEM;
2613 }
2614
2615 static void __devinet_sysctl_unregister(struct net *net,
2616                                         struct ipv4_devconf *cnf, int ifindex)
2617 {
2618         struct devinet_sysctl_table *t = cnf->sysctl;
2619
2620         if (t) {
2621                 cnf->sysctl = NULL;
2622                 unregister_net_sysctl_table(t->sysctl_header);
2623                 kfree(t);
2624         }
2625
2626         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2627 }
2628
2629 static int devinet_sysctl_register(struct in_device *idev)
2630 {
2631         int err;
2632
2633         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2634                 return -EINVAL;
2635
2636         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2637         if (err)
2638                 return err;
2639         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2640                                         idev->dev->ifindex, &idev->cnf);
2641         if (err)
2642                 neigh_sysctl_unregister(idev->arp_parms);
2643         return err;
2644 }
2645
2646 static void devinet_sysctl_unregister(struct in_device *idev)
2647 {
2648         struct net *net = dev_net(idev->dev);
2649
2650         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2651         neigh_sysctl_unregister(idev->arp_parms);
2652 }
2653
2654 static struct ctl_table ctl_forward_entry[] = {
2655         {
2656                 .procname       = "ip_forward",
2657                 .data           = &ipv4_devconf.data[
2658                                         IPV4_DEVCONF_FORWARDING - 1],
2659                 .maxlen         = sizeof(int),
2660                 .mode           = 0644,
2661                 .proc_handler   = devinet_sysctl_forward,
2662                 .extra1         = &ipv4_devconf,
2663                 .extra2         = &init_net,
2664         },
2665         { },
2666 };
2667 #endif
2668
2669 static __net_init int devinet_init_net(struct net *net)
2670 {
2671         int err;
2672         struct ipv4_devconf *all, *dflt;
2673 #ifdef CONFIG_SYSCTL
2674         struct ctl_table *tbl;
2675         struct ctl_table_header *forw_hdr;
2676 #endif
2677
2678         err = -ENOMEM;
2679         all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2680         if (!all)
2681                 goto err_alloc_all;
2682
2683         dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2684         if (!dflt)
2685                 goto err_alloc_dflt;
2686
2687 #ifdef CONFIG_SYSCTL
2688         tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2689         if (!tbl)
2690                 goto err_alloc_ctl;
2691
2692         tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2693         tbl[0].extra1 = all;
2694         tbl[0].extra2 = net;
2695 #endif
2696
2697         if (!net_eq(net, &init_net)) {
2698                 switch (net_inherit_devconf()) {
2699                 case 3:
2700                         /* copy from the current netns */
2701                         memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2702                                sizeof(ipv4_devconf));
2703                         memcpy(dflt,
2704                                current->nsproxy->net_ns->ipv4.devconf_dflt,
2705                                sizeof(ipv4_devconf_dflt));
2706                         break;
2707                 case 0:
2708                 case 1:
2709                         /* copy from init_net */
2710                         memcpy(all, init_net.ipv4.devconf_all,
2711                                sizeof(ipv4_devconf));
2712                         memcpy(dflt, init_net.ipv4.devconf_dflt,
2713                                sizeof(ipv4_devconf_dflt));
2714                         break;
2715                 case 2:
2716                         /* use compiled values */
2717                         break;
2718                 }
2719         }
2720
2721 #ifdef CONFIG_SYSCTL
2722         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2723         if (err < 0)
2724                 goto err_reg_all;
2725
2726         err = __devinet_sysctl_register(net, "default",
2727                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2728         if (err < 0)
2729                 goto err_reg_dflt;
2730
2731         err = -ENOMEM;
2732         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2733         if (!forw_hdr)
2734                 goto err_reg_ctl;
2735         net->ipv4.forw_hdr = forw_hdr;
2736 #endif
2737
2738         net->ipv4.devconf_all = all;
2739         net->ipv4.devconf_dflt = dflt;
2740         return 0;
2741
2742 #ifdef CONFIG_SYSCTL
2743 err_reg_ctl:
2744         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2745 err_reg_dflt:
2746         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2747 err_reg_all:
2748         kfree(tbl);
2749 err_alloc_ctl:
2750 #endif
2751         kfree(dflt);
2752 err_alloc_dflt:
2753         kfree(all);
2754 err_alloc_all:
2755         return err;
2756 }
2757
2758 static __net_exit void devinet_exit_net(struct net *net)
2759 {
2760 #ifdef CONFIG_SYSCTL
2761         struct ctl_table *tbl;
2762
2763         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2764         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2765         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2766                                     NETCONFA_IFINDEX_DEFAULT);
2767         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2768                                     NETCONFA_IFINDEX_ALL);
2769         kfree(tbl);
2770 #endif
2771         kfree(net->ipv4.devconf_dflt);
2772         kfree(net->ipv4.devconf_all);
2773 }
2774
2775 static __net_initdata struct pernet_operations devinet_ops = {
2776         .init = devinet_init_net,
2777         .exit = devinet_exit_net,
2778 };
2779
2780 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2781         .family           = AF_INET,
2782         .fill_link_af     = inet_fill_link_af,
2783         .get_link_af_size = inet_get_link_af_size,
2784         .validate_link_af = inet_validate_link_af,
2785         .set_link_af      = inet_set_link_af,
2786 };
2787
2788 void __init devinet_init(void)
2789 {
2790         int i;
2791
2792         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2793                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2794
2795         register_pernet_subsys(&devinet_ops);
2796         register_netdevice_notifier(&ip_netdev_notifier);
2797
2798         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2799
2800         rtnl_af_register(&inet_af_ops);
2801
2802         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2803         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2804         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2805         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2806                       inet_netconf_dump_devconf, 0);
2807 }