GNU Linux-libre 4.4.284-gnu1
[releases.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 #define IPV6ONLY_FLAGS  \
71                 (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
72                  IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
73                  IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
74
75 static struct ipv4_devconf ipv4_devconf = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
82                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
83         },
84 };
85
86 static struct ipv4_devconf ipv4_devconf_dflt = {
87         .data = {
88                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
89                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
90                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
91                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
92                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
93                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
94                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
95         },
96 };
97
98 #define IPV4_DEVCONF_DFLT(net, attr) \
99         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
100
101 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
102         [IFA_LOCAL]             = { .type = NLA_U32 },
103         [IFA_ADDRESS]           = { .type = NLA_U32 },
104         [IFA_BROADCAST]         = { .type = NLA_U32 },
105         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
106         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
107         [IFA_FLAGS]             = { .type = NLA_U32 },
108 };
109
110 #define IN4_ADDR_HSIZE_SHIFT    8
111 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
112
113 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
114
115 static u32 inet_addr_hash(const struct net *net, __be32 addr)
116 {
117         u32 val = (__force u32) addr ^ net_hash_mix(net);
118
119         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
120 }
121
122 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
123 {
124         u32 hash = inet_addr_hash(net, ifa->ifa_local);
125
126         ASSERT_RTNL();
127         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
128 }
129
130 static void inet_hash_remove(struct in_ifaddr *ifa)
131 {
132         ASSERT_RTNL();
133         hlist_del_init_rcu(&ifa->hash);
134 }
135
136 /**
137  * __ip_dev_find - find the first device with a given source address.
138  * @net: the net namespace
139  * @addr: the source address
140  * @devref: if true, take a reference on the found device
141  *
142  * If a caller uses devref=false, it should be protected by RCU, or RTNL
143  */
144 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
145 {
146         u32 hash = inet_addr_hash(net, addr);
147         struct net_device *result = NULL;
148         struct in_ifaddr *ifa;
149
150         rcu_read_lock();
151         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
152                 if (ifa->ifa_local == addr) {
153                         struct net_device *dev = ifa->ifa_dev->dev;
154
155                         if (!net_eq(dev_net(dev), net))
156                                 continue;
157                         result = dev;
158                         break;
159                 }
160         }
161         if (!result) {
162                 struct flowi4 fl4 = { .daddr = addr };
163                 struct fib_result res = { 0 };
164                 struct fib_table *local;
165
166                 /* Fallback to FIB local table so that communication
167                  * over loopback subnets work.
168                  */
169                 local = fib_get_table(net, RT_TABLE_LOCAL);
170                 if (local &&
171                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
172                     res.type == RTN_LOCAL)
173                         result = FIB_RES_DEV(res);
174         }
175         if (result && devref)
176                 dev_hold(result);
177         rcu_read_unlock();
178         return result;
179 }
180 EXPORT_SYMBOL(__ip_dev_find);
181
182 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
183
184 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
185 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
186                          int destroy);
187 #ifdef CONFIG_SYSCTL
188 static int devinet_sysctl_register(struct in_device *idev);
189 static void devinet_sysctl_unregister(struct in_device *idev);
190 #else
191 static int devinet_sysctl_register(struct in_device *idev)
192 {
193         return 0;
194 }
195 static void devinet_sysctl_unregister(struct in_device *idev)
196 {
197 }
198 #endif
199
200 /* Locks all the inet devices. */
201
202 static struct in_ifaddr *inet_alloc_ifa(void)
203 {
204         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
205 }
206
207 static void inet_rcu_free_ifa(struct rcu_head *head)
208 {
209         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
210         if (ifa->ifa_dev)
211                 in_dev_put(ifa->ifa_dev);
212         kfree(ifa);
213 }
214
215 static void inet_free_ifa(struct in_ifaddr *ifa)
216 {
217         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
218 }
219
220 void in_dev_finish_destroy(struct in_device *idev)
221 {
222         struct net_device *dev = idev->dev;
223
224         WARN_ON(idev->ifa_list);
225         WARN_ON(idev->mc_list);
226         kfree(rcu_dereference_protected(idev->mc_hash, 1));
227 #ifdef NET_REFCNT_DEBUG
228         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
229 #endif
230         dev_put(dev);
231         if (!idev->dead)
232                 pr_err("Freeing alive in_device %p\n", idev);
233         else
234                 kfree(idev);
235 }
236 EXPORT_SYMBOL(in_dev_finish_destroy);
237
238 static struct in_device *inetdev_init(struct net_device *dev)
239 {
240         struct in_device *in_dev;
241         int err = -ENOMEM;
242
243         ASSERT_RTNL();
244
245         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
246         if (!in_dev)
247                 goto out;
248         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
249                         sizeof(in_dev->cnf));
250         in_dev->cnf.sysctl = NULL;
251         in_dev->dev = dev;
252         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
253         if (!in_dev->arp_parms)
254                 goto out_kfree;
255         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
256                 dev_disable_lro(dev);
257         /* Reference in_dev->dev */
258         dev_hold(dev);
259         /* Account for reference dev->ip_ptr (below) */
260         in_dev_hold(in_dev);
261
262         err = devinet_sysctl_register(in_dev);
263         if (err) {
264                 in_dev->dead = 1;
265                 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
266                 in_dev_put(in_dev);
267                 in_dev = NULL;
268                 goto out;
269         }
270         ip_mc_init_dev(in_dev);
271         if (dev->flags & IFF_UP)
272                 ip_mc_up(in_dev);
273
274         /* we can receive as soon as ip_ptr is set -- do this last */
275         rcu_assign_pointer(dev->ip_ptr, in_dev);
276 out:
277         return in_dev ?: ERR_PTR(err);
278 out_kfree:
279         kfree(in_dev);
280         in_dev = NULL;
281         goto out;
282 }
283
284 static void in_dev_rcu_put(struct rcu_head *head)
285 {
286         struct in_device *idev = container_of(head, struct in_device, rcu_head);
287         in_dev_put(idev);
288 }
289
290 static void inetdev_destroy(struct in_device *in_dev)
291 {
292         struct in_ifaddr *ifa;
293         struct net_device *dev;
294
295         ASSERT_RTNL();
296
297         dev = in_dev->dev;
298
299         in_dev->dead = 1;
300
301         ip_mc_destroy_dev(in_dev);
302
303         while ((ifa = in_dev->ifa_list) != NULL) {
304                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
305                 inet_free_ifa(ifa);
306         }
307
308         RCU_INIT_POINTER(dev->ip_ptr, NULL);
309
310         devinet_sysctl_unregister(in_dev);
311         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
312         arp_ifdown(dev);
313
314         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
315 }
316
317 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
318 {
319         rcu_read_lock();
320         for_primary_ifa(in_dev) {
321                 if (inet_ifa_match(a, ifa)) {
322                         if (!b || inet_ifa_match(b, ifa)) {
323                                 rcu_read_unlock();
324                                 return 1;
325                         }
326                 }
327         } endfor_ifa(in_dev);
328         rcu_read_unlock();
329         return 0;
330 }
331
332 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
333                          int destroy, struct nlmsghdr *nlh, u32 portid)
334 {
335         struct in_ifaddr *promote = NULL;
336         struct in_ifaddr *ifa, *ifa1 = *ifap;
337         struct in_ifaddr *last_prim = in_dev->ifa_list;
338         struct in_ifaddr *prev_prom = NULL;
339         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
340
341         ASSERT_RTNL();
342
343         if (in_dev->dead)
344                 goto no_promotions;
345
346         /* 1. Deleting primary ifaddr forces deletion all secondaries
347          * unless alias promotion is set
348          **/
349
350         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
351                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
352
353                 while ((ifa = *ifap1) != NULL) {
354                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
355                             ifa1->ifa_scope <= ifa->ifa_scope)
356                                 last_prim = ifa;
357
358                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
359                             ifa1->ifa_mask != ifa->ifa_mask ||
360                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
361                                 ifap1 = &ifa->ifa_next;
362                                 prev_prom = ifa;
363                                 continue;
364                         }
365
366                         if (!do_promote) {
367                                 inet_hash_remove(ifa);
368                                 *ifap1 = ifa->ifa_next;
369
370                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
371                                 blocking_notifier_call_chain(&inetaddr_chain,
372                                                 NETDEV_DOWN, ifa);
373                                 inet_free_ifa(ifa);
374                         } else {
375                                 promote = ifa;
376                                 break;
377                         }
378                 }
379         }
380
381         /* On promotion all secondaries from subnet are changing
382          * the primary IP, we must remove all their routes silently
383          * and later to add them back with new prefsrc. Do this
384          * while all addresses are on the device list.
385          */
386         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
387                 if (ifa1->ifa_mask == ifa->ifa_mask &&
388                     inet_ifa_match(ifa1->ifa_address, ifa))
389                         fib_del_ifaddr(ifa, ifa1);
390         }
391
392 no_promotions:
393         /* 2. Unlink it */
394
395         *ifap = ifa1->ifa_next;
396         inet_hash_remove(ifa1);
397
398         /* 3. Announce address deletion */
399
400         /* Send message first, then call notifier.
401            At first sight, FIB update triggered by notifier
402            will refer to already deleted ifaddr, that could confuse
403            netlink listeners. It is not true: look, gated sees
404            that route deleted and if it still thinks that ifaddr
405            is valid, it will try to restore deleted routes... Grr.
406            So that, this order is correct.
407          */
408         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
409         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
410
411         if (promote) {
412                 struct in_ifaddr *next_sec = promote->ifa_next;
413
414                 if (prev_prom) {
415                         prev_prom->ifa_next = promote->ifa_next;
416                         promote->ifa_next = last_prim->ifa_next;
417                         last_prim->ifa_next = promote;
418                 }
419
420                 promote->ifa_flags &= ~IFA_F_SECONDARY;
421                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
422                 blocking_notifier_call_chain(&inetaddr_chain,
423                                 NETDEV_UP, promote);
424                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
425                         if (ifa1->ifa_mask != ifa->ifa_mask ||
426                             !inet_ifa_match(ifa1->ifa_address, ifa))
427                                         continue;
428                         fib_add_ifaddr(ifa);
429                 }
430
431         }
432         if (destroy)
433                 inet_free_ifa(ifa1);
434 }
435
436 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
437                          int destroy)
438 {
439         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
440 }
441
442 static void check_lifetime(struct work_struct *work);
443
444 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
445
446 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
447                              u32 portid)
448 {
449         struct in_device *in_dev = ifa->ifa_dev;
450         struct in_ifaddr *ifa1, **ifap, **last_primary;
451
452         ASSERT_RTNL();
453
454         if (!ifa->ifa_local) {
455                 inet_free_ifa(ifa);
456                 return 0;
457         }
458
459         ifa->ifa_flags &= ~IFA_F_SECONDARY;
460         last_primary = &in_dev->ifa_list;
461
462         /* Don't set IPv6 only flags to IPv4 addresses */
463         ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
464
465         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
466              ifap = &ifa1->ifa_next) {
467                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
468                     ifa->ifa_scope <= ifa1->ifa_scope)
469                         last_primary = &ifa1->ifa_next;
470                 if (ifa1->ifa_mask == ifa->ifa_mask &&
471                     inet_ifa_match(ifa1->ifa_address, ifa)) {
472                         if (ifa1->ifa_local == ifa->ifa_local) {
473                                 inet_free_ifa(ifa);
474                                 return -EEXIST;
475                         }
476                         if (ifa1->ifa_scope != ifa->ifa_scope) {
477                                 inet_free_ifa(ifa);
478                                 return -EINVAL;
479                         }
480                         ifa->ifa_flags |= IFA_F_SECONDARY;
481                 }
482         }
483
484         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
485                 prandom_seed((__force u32) ifa->ifa_local);
486                 ifap = last_primary;
487         }
488
489         ifa->ifa_next = *ifap;
490         *ifap = ifa;
491
492         inet_hash_insert(dev_net(in_dev->dev), ifa);
493
494         cancel_delayed_work(&check_lifetime_work);
495         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
496
497         /* Send message first, then call notifier.
498            Notifier will trigger FIB update, so that
499            listeners of netlink will know about new ifaddr */
500         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
501         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
502
503         return 0;
504 }
505
506 static int inet_insert_ifa(struct in_ifaddr *ifa)
507 {
508         return __inet_insert_ifa(ifa, NULL, 0);
509 }
510
511 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
512 {
513         struct in_device *in_dev = __in_dev_get_rtnl(dev);
514
515         ASSERT_RTNL();
516
517         if (!in_dev) {
518                 inet_free_ifa(ifa);
519                 return -ENOBUFS;
520         }
521         ipv4_devconf_setall(in_dev);
522         neigh_parms_data_state_setall(in_dev->arp_parms);
523         if (ifa->ifa_dev != in_dev) {
524                 WARN_ON(ifa->ifa_dev);
525                 in_dev_hold(in_dev);
526                 ifa->ifa_dev = in_dev;
527         }
528         if (ipv4_is_loopback(ifa->ifa_local))
529                 ifa->ifa_scope = RT_SCOPE_HOST;
530         return inet_insert_ifa(ifa);
531 }
532
533 /* Caller must hold RCU or RTNL :
534  * We dont take a reference on found in_device
535  */
536 struct in_device *inetdev_by_index(struct net *net, int ifindex)
537 {
538         struct net_device *dev;
539         struct in_device *in_dev = NULL;
540
541         rcu_read_lock();
542         dev = dev_get_by_index_rcu(net, ifindex);
543         if (dev)
544                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
545         rcu_read_unlock();
546         return in_dev;
547 }
548 EXPORT_SYMBOL(inetdev_by_index);
549
550 /* Called only from RTNL semaphored context. No locks. */
551
552 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
553                                     __be32 mask)
554 {
555         ASSERT_RTNL();
556
557         for_primary_ifa(in_dev) {
558                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
559                         return ifa;
560         } endfor_ifa(in_dev);
561         return NULL;
562 }
563
564 static int ip_mc_autojoin_config(struct net *net, bool join,
565                                  const struct in_ifaddr *ifa)
566 {
567 #if defined(CONFIG_IP_MULTICAST)
568         struct ip_mreqn mreq = {
569                 .imr_multiaddr.s_addr = ifa->ifa_address,
570                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
571         };
572         struct sock *sk = net->ipv4.mc_autojoin_sk;
573         int ret;
574
575         ASSERT_RTNL();
576
577         lock_sock(sk);
578         if (join)
579                 ret = ip_mc_join_group(sk, &mreq);
580         else
581                 ret = ip_mc_leave_group(sk, &mreq);
582         release_sock(sk);
583
584         return ret;
585 #else
586         return -EOPNOTSUPP;
587 #endif
588 }
589
590 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
591 {
592         struct net *net = sock_net(skb->sk);
593         struct nlattr *tb[IFA_MAX+1];
594         struct in_device *in_dev;
595         struct ifaddrmsg *ifm;
596         struct in_ifaddr *ifa, **ifap;
597         int err = -EINVAL;
598
599         ASSERT_RTNL();
600
601         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
602         if (err < 0)
603                 goto errout;
604
605         ifm = nlmsg_data(nlh);
606         in_dev = inetdev_by_index(net, ifm->ifa_index);
607         if (!in_dev) {
608                 err = -ENODEV;
609                 goto errout;
610         }
611
612         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
613              ifap = &ifa->ifa_next) {
614                 if (tb[IFA_LOCAL] &&
615                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
616                         continue;
617
618                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
619                         continue;
620
621                 if (tb[IFA_ADDRESS] &&
622                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
623                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
624                         continue;
625
626                 if (ipv4_is_multicast(ifa->ifa_address))
627                         ip_mc_autojoin_config(net, false, ifa);
628                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
629                 return 0;
630         }
631
632         err = -EADDRNOTAVAIL;
633 errout:
634         return err;
635 }
636
637 #define INFINITY_LIFE_TIME      0xFFFFFFFF
638
639 static void check_lifetime(struct work_struct *work)
640 {
641         unsigned long now, next, next_sec, next_sched;
642         struct in_ifaddr *ifa;
643         struct hlist_node *n;
644         int i;
645
646         now = jiffies;
647         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
648
649         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
650                 bool change_needed = false;
651
652                 rcu_read_lock();
653                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
654                         unsigned long age;
655
656                         if (ifa->ifa_flags & IFA_F_PERMANENT)
657                                 continue;
658
659                         /* We try to batch several events at once. */
660                         age = (now - ifa->ifa_tstamp +
661                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
662
663                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
664                             age >= ifa->ifa_valid_lft) {
665                                 change_needed = true;
666                         } else if (ifa->ifa_preferred_lft ==
667                                    INFINITY_LIFE_TIME) {
668                                 continue;
669                         } else if (age >= ifa->ifa_preferred_lft) {
670                                 if (time_before(ifa->ifa_tstamp +
671                                                 ifa->ifa_valid_lft * HZ, next))
672                                         next = ifa->ifa_tstamp +
673                                                ifa->ifa_valid_lft * HZ;
674
675                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
676                                         change_needed = true;
677                         } else if (time_before(ifa->ifa_tstamp +
678                                                ifa->ifa_preferred_lft * HZ,
679                                                next)) {
680                                 next = ifa->ifa_tstamp +
681                                        ifa->ifa_preferred_lft * HZ;
682                         }
683                 }
684                 rcu_read_unlock();
685                 if (!change_needed)
686                         continue;
687                 rtnl_lock();
688                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
689                         unsigned long age;
690
691                         if (ifa->ifa_flags & IFA_F_PERMANENT)
692                                 continue;
693
694                         /* We try to batch several events at once. */
695                         age = (now - ifa->ifa_tstamp +
696                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
697
698                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
699                             age >= ifa->ifa_valid_lft) {
700                                 struct in_ifaddr **ifap;
701
702                                 for (ifap = &ifa->ifa_dev->ifa_list;
703                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
704                                         if (*ifap == ifa) {
705                                                 inet_del_ifa(ifa->ifa_dev,
706                                                              ifap, 1);
707                                                 break;
708                                         }
709                                 }
710                         } else if (ifa->ifa_preferred_lft !=
711                                    INFINITY_LIFE_TIME &&
712                                    age >= ifa->ifa_preferred_lft &&
713                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
714                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
715                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
716                         }
717                 }
718                 rtnl_unlock();
719         }
720
721         next_sec = round_jiffies_up(next);
722         next_sched = next;
723
724         /* If rounded timeout is accurate enough, accept it. */
725         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
726                 next_sched = next_sec;
727
728         now = jiffies;
729         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
730         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
731                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
732
733         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
734                         next_sched - now);
735 }
736
737 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
738                              __u32 prefered_lft)
739 {
740         unsigned long timeout;
741
742         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
743
744         timeout = addrconf_timeout_fixup(valid_lft, HZ);
745         if (addrconf_finite_timeout(timeout))
746                 ifa->ifa_valid_lft = timeout;
747         else
748                 ifa->ifa_flags |= IFA_F_PERMANENT;
749
750         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
751         if (addrconf_finite_timeout(timeout)) {
752                 if (timeout == 0)
753                         ifa->ifa_flags |= IFA_F_DEPRECATED;
754                 ifa->ifa_preferred_lft = timeout;
755         }
756         ifa->ifa_tstamp = jiffies;
757         if (!ifa->ifa_cstamp)
758                 ifa->ifa_cstamp = ifa->ifa_tstamp;
759 }
760
761 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
762                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
763 {
764         struct nlattr *tb[IFA_MAX+1];
765         struct in_ifaddr *ifa;
766         struct ifaddrmsg *ifm;
767         struct net_device *dev;
768         struct in_device *in_dev;
769         int err;
770
771         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
772         if (err < 0)
773                 goto errout;
774
775         ifm = nlmsg_data(nlh);
776         err = -EINVAL;
777         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
778                 goto errout;
779
780         dev = __dev_get_by_index(net, ifm->ifa_index);
781         err = -ENODEV;
782         if (!dev)
783                 goto errout;
784
785         in_dev = __in_dev_get_rtnl(dev);
786         err = -ENOBUFS;
787         if (!in_dev)
788                 goto errout;
789
790         ifa = inet_alloc_ifa();
791         if (!ifa)
792                 /*
793                  * A potential indev allocation can be left alive, it stays
794                  * assigned to its device and is destroy with it.
795                  */
796                 goto errout;
797
798         ipv4_devconf_setall(in_dev);
799         neigh_parms_data_state_setall(in_dev->arp_parms);
800         in_dev_hold(in_dev);
801
802         if (!tb[IFA_ADDRESS])
803                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
804
805         INIT_HLIST_NODE(&ifa->hash);
806         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
807         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
808         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
809                                          ifm->ifa_flags;
810         ifa->ifa_scope = ifm->ifa_scope;
811         ifa->ifa_dev = in_dev;
812
813         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
814         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
815
816         if (tb[IFA_BROADCAST])
817                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
818
819         if (tb[IFA_LABEL])
820                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
821         else
822                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
823
824         if (tb[IFA_CACHEINFO]) {
825                 struct ifa_cacheinfo *ci;
826
827                 ci = nla_data(tb[IFA_CACHEINFO]);
828                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
829                         err = -EINVAL;
830                         goto errout_free;
831                 }
832                 *pvalid_lft = ci->ifa_valid;
833                 *pprefered_lft = ci->ifa_prefered;
834         }
835
836         return ifa;
837
838 errout_free:
839         inet_free_ifa(ifa);
840 errout:
841         return ERR_PTR(err);
842 }
843
844 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
845 {
846         struct in_device *in_dev = ifa->ifa_dev;
847         struct in_ifaddr *ifa1, **ifap;
848
849         if (!ifa->ifa_local)
850                 return NULL;
851
852         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
853              ifap = &ifa1->ifa_next) {
854                 if (ifa1->ifa_mask == ifa->ifa_mask &&
855                     inet_ifa_match(ifa1->ifa_address, ifa) &&
856                     ifa1->ifa_local == ifa->ifa_local)
857                         return ifa1;
858         }
859         return NULL;
860 }
861
862 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
863 {
864         struct net *net = sock_net(skb->sk);
865         struct in_ifaddr *ifa;
866         struct in_ifaddr *ifa_existing;
867         __u32 valid_lft = INFINITY_LIFE_TIME;
868         __u32 prefered_lft = INFINITY_LIFE_TIME;
869
870         ASSERT_RTNL();
871
872         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
873         if (IS_ERR(ifa))
874                 return PTR_ERR(ifa);
875
876         ifa_existing = find_matching_ifa(ifa);
877         if (!ifa_existing) {
878                 /* It would be best to check for !NLM_F_CREATE here but
879                  * userspace already relies on not having to provide this.
880                  */
881                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
882                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
883                         int ret = ip_mc_autojoin_config(net, true, ifa);
884
885                         if (ret < 0) {
886                                 inet_free_ifa(ifa);
887                                 return ret;
888                         }
889                 }
890                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
891         } else {
892                 inet_free_ifa(ifa);
893
894                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
895                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
896                         return -EEXIST;
897                 ifa = ifa_existing;
898                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
899                 cancel_delayed_work(&check_lifetime_work);
900                 queue_delayed_work(system_power_efficient_wq,
901                                 &check_lifetime_work, 0);
902                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
903         }
904         return 0;
905 }
906
907 /*
908  *      Determine a default network mask, based on the IP address.
909  */
910
911 static int inet_abc_len(__be32 addr)
912 {
913         int rc = -1;    /* Something else, probably a multicast. */
914
915         if (ipv4_is_zeronet(addr))
916                 rc = 0;
917         else {
918                 __u32 haddr = ntohl(addr);
919
920                 if (IN_CLASSA(haddr))
921                         rc = 8;
922                 else if (IN_CLASSB(haddr))
923                         rc = 16;
924                 else if (IN_CLASSC(haddr))
925                         rc = 24;
926         }
927
928         return rc;
929 }
930
931
932 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
933 {
934         struct ifreq ifr;
935         struct sockaddr_in sin_orig;
936         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
937         struct in_device *in_dev;
938         struct in_ifaddr **ifap = NULL;
939         struct in_ifaddr *ifa = NULL;
940         struct net_device *dev;
941         char *colon;
942         int ret = -EFAULT;
943         int tryaddrmatch = 0;
944
945         /*
946          *      Fetch the caller's info block into kernel space
947          */
948
949         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
950                 goto out;
951         ifr.ifr_name[IFNAMSIZ - 1] = 0;
952
953         /* save original address for comparison */
954         memcpy(&sin_orig, sin, sizeof(*sin));
955
956         colon = strchr(ifr.ifr_name, ':');
957         if (colon)
958                 *colon = 0;
959
960         dev_load(net, ifr.ifr_name);
961
962         switch (cmd) {
963         case SIOCGIFADDR:       /* Get interface address */
964         case SIOCGIFBRDADDR:    /* Get the broadcast address */
965         case SIOCGIFDSTADDR:    /* Get the destination address */
966         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
967                 /* Note that these ioctls will not sleep,
968                    so that we do not impose a lock.
969                    One day we will be forced to put shlock here (I mean SMP)
970                  */
971                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
972                 memset(sin, 0, sizeof(*sin));
973                 sin->sin_family = AF_INET;
974                 break;
975
976         case SIOCSIFFLAGS:
977                 ret = -EPERM;
978                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
979                         goto out;
980                 break;
981         case SIOCSIFADDR:       /* Set interface address (and family) */
982         case SIOCSIFBRDADDR:    /* Set the broadcast address */
983         case SIOCSIFDSTADDR:    /* Set the destination address */
984         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
985                 ret = -EPERM;
986                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
987                         goto out;
988                 ret = -EINVAL;
989                 if (sin->sin_family != AF_INET)
990                         goto out;
991                 break;
992         default:
993                 ret = -EINVAL;
994                 goto out;
995         }
996
997         rtnl_lock();
998
999         ret = -ENODEV;
1000         dev = __dev_get_by_name(net, ifr.ifr_name);
1001         if (!dev)
1002                 goto done;
1003
1004         if (colon)
1005                 *colon = ':';
1006
1007         in_dev = __in_dev_get_rtnl(dev);
1008         if (in_dev) {
1009                 if (tryaddrmatch) {
1010                         /* Matthias Andree */
1011                         /* compare label and address (4.4BSD style) */
1012                         /* note: we only do this for a limited set of ioctls
1013                            and only if the original address family was AF_INET.
1014                            This is checked above. */
1015                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1016                              ifap = &ifa->ifa_next) {
1017                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1018                                     sin_orig.sin_addr.s_addr ==
1019                                                         ifa->ifa_local) {
1020                                         break; /* found */
1021                                 }
1022                         }
1023                 }
1024                 /* we didn't get a match, maybe the application is
1025                    4.3BSD-style and passed in junk so we fall back to
1026                    comparing just the label */
1027                 if (!ifa) {
1028                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1029                              ifap = &ifa->ifa_next)
1030                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1031                                         break;
1032                 }
1033         }
1034
1035         ret = -EADDRNOTAVAIL;
1036         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1037                 goto done;
1038
1039         switch (cmd) {
1040         case SIOCGIFADDR:       /* Get interface address */
1041                 sin->sin_addr.s_addr = ifa->ifa_local;
1042                 goto rarok;
1043
1044         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1045                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1046                 goto rarok;
1047
1048         case SIOCGIFDSTADDR:    /* Get the destination address */
1049                 sin->sin_addr.s_addr = ifa->ifa_address;
1050                 goto rarok;
1051
1052         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1053                 sin->sin_addr.s_addr = ifa->ifa_mask;
1054                 goto rarok;
1055
1056         case SIOCSIFFLAGS:
1057                 if (colon) {
1058                         ret = -EADDRNOTAVAIL;
1059                         if (!ifa)
1060                                 break;
1061                         ret = 0;
1062                         if (!(ifr.ifr_flags & IFF_UP))
1063                                 inet_del_ifa(in_dev, ifap, 1);
1064                         break;
1065                 }
1066                 ret = dev_change_flags(dev, ifr.ifr_flags);
1067                 break;
1068
1069         case SIOCSIFADDR:       /* Set interface address (and family) */
1070                 ret = -EINVAL;
1071                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072                         break;
1073
1074                 if (!ifa) {
1075                         ret = -ENOBUFS;
1076                         ifa = inet_alloc_ifa();
1077                         if (!ifa)
1078                                 break;
1079                         INIT_HLIST_NODE(&ifa->hash);
1080                         if (colon)
1081                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1082                         else
1083                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084                 } else {
1085                         ret = 0;
1086                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1087                                 break;
1088                         inet_del_ifa(in_dev, ifap, 0);
1089                         ifa->ifa_broadcast = 0;
1090                         ifa->ifa_scope = 0;
1091                 }
1092
1093                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1094
1095                 if (!(dev->flags & IFF_POINTOPOINT)) {
1096                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1097                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1098                         if ((dev->flags & IFF_BROADCAST) &&
1099                             ifa->ifa_prefixlen < 31)
1100                                 ifa->ifa_broadcast = ifa->ifa_address |
1101                                                      ~ifa->ifa_mask;
1102                 } else {
1103                         ifa->ifa_prefixlen = 32;
1104                         ifa->ifa_mask = inet_make_mask(32);
1105                 }
1106                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1107                 ret = inet_set_ifa(dev, ifa);
1108                 break;
1109
1110         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1111                 ret = 0;
1112                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1113                         inet_del_ifa(in_dev, ifap, 0);
1114                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1115                         inet_insert_ifa(ifa);
1116                 }
1117                 break;
1118
1119         case SIOCSIFDSTADDR:    /* Set the destination address */
1120                 ret = 0;
1121                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1122                         break;
1123                 ret = -EINVAL;
1124                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1125                         break;
1126                 ret = 0;
1127                 inet_del_ifa(in_dev, ifap, 0);
1128                 ifa->ifa_address = sin->sin_addr.s_addr;
1129                 inet_insert_ifa(ifa);
1130                 break;
1131
1132         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1133
1134                 /*
1135                  *      The mask we set must be legal.
1136                  */
1137                 ret = -EINVAL;
1138                 if (bad_mask(sin->sin_addr.s_addr, 0))
1139                         break;
1140                 ret = 0;
1141                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1142                         __be32 old_mask = ifa->ifa_mask;
1143                         inet_del_ifa(in_dev, ifap, 0);
1144                         ifa->ifa_mask = sin->sin_addr.s_addr;
1145                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1146
1147                         /* See if current broadcast address matches
1148                          * with current netmask, then recalculate
1149                          * the broadcast address. Otherwise it's a
1150                          * funny address, so don't touch it since
1151                          * the user seems to know what (s)he's doing...
1152                          */
1153                         if ((dev->flags & IFF_BROADCAST) &&
1154                             (ifa->ifa_prefixlen < 31) &&
1155                             (ifa->ifa_broadcast ==
1156                              (ifa->ifa_local|~old_mask))) {
1157                                 ifa->ifa_broadcast = (ifa->ifa_local |
1158                                                       ~sin->sin_addr.s_addr);
1159                         }
1160                         inet_insert_ifa(ifa);
1161                 }
1162                 break;
1163         }
1164 done:
1165         rtnl_unlock();
1166 out:
1167         return ret;
1168 rarok:
1169         rtnl_unlock();
1170         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1171         goto out;
1172 }
1173
1174 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1175 {
1176         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1177         struct in_ifaddr *ifa;
1178         struct ifreq ifr;
1179         int done = 0;
1180
1181         if (!in_dev)
1182                 goto out;
1183
1184         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1185                 if (!buf) {
1186                         done += sizeof(ifr);
1187                         continue;
1188                 }
1189                 if (len < (int) sizeof(ifr))
1190                         break;
1191                 memset(&ifr, 0, sizeof(struct ifreq));
1192                 strcpy(ifr.ifr_name, ifa->ifa_label);
1193
1194                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1195                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1196                                                                 ifa->ifa_local;
1197
1198                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1199                         done = -EFAULT;
1200                         break;
1201                 }
1202                 buf  += sizeof(struct ifreq);
1203                 len  -= sizeof(struct ifreq);
1204                 done += sizeof(struct ifreq);
1205         }
1206 out:
1207         return done;
1208 }
1209
1210 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1211 {
1212         __be32 addr = 0;
1213         struct in_device *in_dev;
1214         struct net *net = dev_net(dev);
1215
1216         rcu_read_lock();
1217         in_dev = __in_dev_get_rcu(dev);
1218         if (!in_dev)
1219                 goto no_in_dev;
1220
1221         for_primary_ifa(in_dev) {
1222                 if (ifa->ifa_scope > scope)
1223                         continue;
1224                 if (!dst || inet_ifa_match(dst, ifa)) {
1225                         addr = ifa->ifa_local;
1226                         break;
1227                 }
1228                 if (!addr)
1229                         addr = ifa->ifa_local;
1230         } endfor_ifa(in_dev);
1231
1232         if (addr)
1233                 goto out_unlock;
1234 no_in_dev:
1235
1236         /* Not loopback addresses on loopback should be preferred
1237            in this case. It is important that lo is the first interface
1238            in dev_base list.
1239          */
1240         for_each_netdev_rcu(net, dev) {
1241                 in_dev = __in_dev_get_rcu(dev);
1242                 if (!in_dev)
1243                         continue;
1244
1245                 for_primary_ifa(in_dev) {
1246                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1247                             ifa->ifa_scope <= scope) {
1248                                 addr = ifa->ifa_local;
1249                                 goto out_unlock;
1250                         }
1251                 } endfor_ifa(in_dev);
1252         }
1253 out_unlock:
1254         rcu_read_unlock();
1255         return addr;
1256 }
1257 EXPORT_SYMBOL(inet_select_addr);
1258
1259 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1260                               __be32 local, int scope)
1261 {
1262         int same = 0;
1263         __be32 addr = 0;
1264
1265         for_ifa(in_dev) {
1266                 if (!addr &&
1267                     (local == ifa->ifa_local || !local) &&
1268                     ifa->ifa_scope <= scope) {
1269                         addr = ifa->ifa_local;
1270                         if (same)
1271                                 break;
1272                 }
1273                 if (!same) {
1274                         same = (!local || inet_ifa_match(local, ifa)) &&
1275                                 (!dst || inet_ifa_match(dst, ifa));
1276                         if (same && addr) {
1277                                 if (local || !dst)
1278                                         break;
1279                                 /* Is the selected addr into dst subnet? */
1280                                 if (inet_ifa_match(addr, ifa))
1281                                         break;
1282                                 /* No, then can we use new local src? */
1283                                 if (ifa->ifa_scope <= scope) {
1284                                         addr = ifa->ifa_local;
1285                                         break;
1286                                 }
1287                                 /* search for large dst subnet for addr */
1288                                 same = 0;
1289                         }
1290                 }
1291         } endfor_ifa(in_dev);
1292
1293         return same ? addr : 0;
1294 }
1295
1296 /*
1297  * Confirm that local IP address exists using wildcards:
1298  * - net: netns to check, cannot be NULL
1299  * - in_dev: only on this interface, NULL=any interface
1300  * - dst: only in the same subnet as dst, 0=any dst
1301  * - local: address, 0=autoselect the local address
1302  * - scope: maximum allowed scope value for the local address
1303  */
1304 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1305                          __be32 dst, __be32 local, int scope)
1306 {
1307         __be32 addr = 0;
1308         struct net_device *dev;
1309
1310         if (in_dev)
1311                 return confirm_addr_indev(in_dev, dst, local, scope);
1312
1313         rcu_read_lock();
1314         for_each_netdev_rcu(net, dev) {
1315                 in_dev = __in_dev_get_rcu(dev);
1316                 if (in_dev) {
1317                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1318                         if (addr)
1319                                 break;
1320                 }
1321         }
1322         rcu_read_unlock();
1323
1324         return addr;
1325 }
1326 EXPORT_SYMBOL(inet_confirm_addr);
1327
1328 /*
1329  *      Device notifier
1330  */
1331
1332 int register_inetaddr_notifier(struct notifier_block *nb)
1333 {
1334         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1335 }
1336 EXPORT_SYMBOL(register_inetaddr_notifier);
1337
1338 int unregister_inetaddr_notifier(struct notifier_block *nb)
1339 {
1340         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1341 }
1342 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1343
1344 /* Rename ifa_labels for a device name change. Make some effort to preserve
1345  * existing alias numbering and to create unique labels if possible.
1346 */
1347 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1348 {
1349         struct in_ifaddr *ifa;
1350         int named = 0;
1351
1352         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1353                 char old[IFNAMSIZ], *dot;
1354
1355                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1356                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1357                 if (named++ == 0)
1358                         goto skip;
1359                 dot = strchr(old, ':');
1360                 if (!dot) {
1361                         sprintf(old, ":%d", named);
1362                         dot = old;
1363                 }
1364                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1365                         strcat(ifa->ifa_label, dot);
1366                 else
1367                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1368 skip:
1369                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1370         }
1371 }
1372
1373 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1374                                         struct in_device *in_dev)
1375
1376 {
1377         struct in_ifaddr *ifa;
1378
1379         for (ifa = in_dev->ifa_list; ifa;
1380              ifa = ifa->ifa_next) {
1381                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1382                          ifa->ifa_local, dev,
1383                          ifa->ifa_local, NULL,
1384                          dev->dev_addr, NULL);
1385         }
1386 }
1387
1388 /* Called only under RTNL semaphore */
1389
1390 static int inetdev_event(struct notifier_block *this, unsigned long event,
1391                          void *ptr)
1392 {
1393         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1394         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1395
1396         ASSERT_RTNL();
1397
1398         if (!in_dev) {
1399                 if (event == NETDEV_REGISTER) {
1400                         in_dev = inetdev_init(dev);
1401                         if (IS_ERR(in_dev))
1402                                 return notifier_from_errno(PTR_ERR(in_dev));
1403                         if (dev->flags & IFF_LOOPBACK) {
1404                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1405                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1406                         }
1407                 } else if (event == NETDEV_CHANGEMTU) {
1408                         /* Re-enabling IP */
1409                         if (inetdev_valid_mtu(dev->mtu))
1410                                 in_dev = inetdev_init(dev);
1411                 }
1412                 goto out;
1413         }
1414
1415         switch (event) {
1416         case NETDEV_REGISTER:
1417                 pr_debug("%s: bug\n", __func__);
1418                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1419                 break;
1420         case NETDEV_UP:
1421                 if (!inetdev_valid_mtu(dev->mtu))
1422                         break;
1423                 if (dev->flags & IFF_LOOPBACK) {
1424                         struct in_ifaddr *ifa = inet_alloc_ifa();
1425
1426                         if (ifa) {
1427                                 INIT_HLIST_NODE(&ifa->hash);
1428                                 ifa->ifa_local =
1429                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1430                                 ifa->ifa_prefixlen = 8;
1431                                 ifa->ifa_mask = inet_make_mask(8);
1432                                 in_dev_hold(in_dev);
1433                                 ifa->ifa_dev = in_dev;
1434                                 ifa->ifa_scope = RT_SCOPE_HOST;
1435                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1436                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1437                                                  INFINITY_LIFE_TIME);
1438                                 ipv4_devconf_setall(in_dev);
1439                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1440                                 inet_insert_ifa(ifa);
1441                         }
1442                 }
1443                 ip_mc_up(in_dev);
1444                 /* fall through */
1445         case NETDEV_CHANGEADDR:
1446                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1447                         break;
1448                 /* fall through */
1449         case NETDEV_NOTIFY_PEERS:
1450                 /* Send gratuitous ARP to notify of link change */
1451                 inetdev_send_gratuitous_arp(dev, in_dev);
1452                 break;
1453         case NETDEV_DOWN:
1454                 ip_mc_down(in_dev);
1455                 break;
1456         case NETDEV_PRE_TYPE_CHANGE:
1457                 ip_mc_unmap(in_dev);
1458                 break;
1459         case NETDEV_POST_TYPE_CHANGE:
1460                 ip_mc_remap(in_dev);
1461                 break;
1462         case NETDEV_CHANGEMTU:
1463                 if (inetdev_valid_mtu(dev->mtu))
1464                         break;
1465                 /* disable IP when MTU is not enough */
1466         case NETDEV_UNREGISTER:
1467                 inetdev_destroy(in_dev);
1468                 break;
1469         case NETDEV_CHANGENAME:
1470                 /* Do not notify about label change, this event is
1471                  * not interesting to applications using netlink.
1472                  */
1473                 inetdev_changename(dev, in_dev);
1474
1475                 devinet_sysctl_unregister(in_dev);
1476                 devinet_sysctl_register(in_dev);
1477                 break;
1478         }
1479 out:
1480         return NOTIFY_DONE;
1481 }
1482
1483 static struct notifier_block ip_netdev_notifier = {
1484         .notifier_call = inetdev_event,
1485 };
1486
1487 static size_t inet_nlmsg_size(void)
1488 {
1489         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1490                + nla_total_size(4) /* IFA_ADDRESS */
1491                + nla_total_size(4) /* IFA_LOCAL */
1492                + nla_total_size(4) /* IFA_BROADCAST */
1493                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1494                + nla_total_size(4)  /* IFA_FLAGS */
1495                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1496 }
1497
1498 static inline u32 cstamp_delta(unsigned long cstamp)
1499 {
1500         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1501 }
1502
1503 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1504                          unsigned long tstamp, u32 preferred, u32 valid)
1505 {
1506         struct ifa_cacheinfo ci;
1507
1508         ci.cstamp = cstamp_delta(cstamp);
1509         ci.tstamp = cstamp_delta(tstamp);
1510         ci.ifa_prefered = preferred;
1511         ci.ifa_valid = valid;
1512
1513         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1514 }
1515
1516 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1517                             u32 portid, u32 seq, int event, unsigned int flags)
1518 {
1519         struct ifaddrmsg *ifm;
1520         struct nlmsghdr  *nlh;
1521         u32 preferred, valid;
1522
1523         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1524         if (!nlh)
1525                 return -EMSGSIZE;
1526
1527         ifm = nlmsg_data(nlh);
1528         ifm->ifa_family = AF_INET;
1529         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1530         ifm->ifa_flags = ifa->ifa_flags;
1531         ifm->ifa_scope = ifa->ifa_scope;
1532         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1533
1534         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1535                 preferred = ifa->ifa_preferred_lft;
1536                 valid = ifa->ifa_valid_lft;
1537                 if (preferred != INFINITY_LIFE_TIME) {
1538                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1539
1540                         if (preferred > tval)
1541                                 preferred -= tval;
1542                         else
1543                                 preferred = 0;
1544                         if (valid != INFINITY_LIFE_TIME) {
1545                                 if (valid > tval)
1546                                         valid -= tval;
1547                                 else
1548                                         valid = 0;
1549                         }
1550                 }
1551         } else {
1552                 preferred = INFINITY_LIFE_TIME;
1553                 valid = INFINITY_LIFE_TIME;
1554         }
1555         if ((ifa->ifa_address &&
1556              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1557             (ifa->ifa_local &&
1558              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1559             (ifa->ifa_broadcast &&
1560              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1561             (ifa->ifa_label[0] &&
1562              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1563             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1564             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1565                           preferred, valid))
1566                 goto nla_put_failure;
1567
1568         nlmsg_end(skb, nlh);
1569         return 0;
1570
1571 nla_put_failure:
1572         nlmsg_cancel(skb, nlh);
1573         return -EMSGSIZE;
1574 }
1575
1576 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1577 {
1578         struct net *net = sock_net(skb->sk);
1579         int h, s_h;
1580         int idx, s_idx;
1581         int ip_idx, s_ip_idx;
1582         struct net_device *dev;
1583         struct in_device *in_dev;
1584         struct in_ifaddr *ifa;
1585         struct hlist_head *head;
1586
1587         s_h = cb->args[0];
1588         s_idx = idx = cb->args[1];
1589         s_ip_idx = ip_idx = cb->args[2];
1590
1591         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1592                 idx = 0;
1593                 head = &net->dev_index_head[h];
1594                 rcu_read_lock();
1595                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1596                           net->dev_base_seq;
1597                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1598                         if (idx < s_idx)
1599                                 goto cont;
1600                         if (h > s_h || idx > s_idx)
1601                                 s_ip_idx = 0;
1602                         in_dev = __in_dev_get_rcu(dev);
1603                         if (!in_dev)
1604                                 goto cont;
1605
1606                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1607                              ifa = ifa->ifa_next, ip_idx++) {
1608                                 if (ip_idx < s_ip_idx)
1609                                         continue;
1610                                 if (inet_fill_ifaddr(skb, ifa,
1611                                              NETLINK_CB(cb->skb).portid,
1612                                              cb->nlh->nlmsg_seq,
1613                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1614                                         rcu_read_unlock();
1615                                         goto done;
1616                                 }
1617                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1618                         }
1619 cont:
1620                         idx++;
1621                 }
1622                 rcu_read_unlock();
1623         }
1624
1625 done:
1626         cb->args[0] = h;
1627         cb->args[1] = idx;
1628         cb->args[2] = ip_idx;
1629
1630         return skb->len;
1631 }
1632
1633 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1634                       u32 portid)
1635 {
1636         struct sk_buff *skb;
1637         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1638         int err = -ENOBUFS;
1639         struct net *net;
1640
1641         net = dev_net(ifa->ifa_dev->dev);
1642         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1643         if (!skb)
1644                 goto errout;
1645
1646         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1647         if (err < 0) {
1648                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1649                 WARN_ON(err == -EMSGSIZE);
1650                 kfree_skb(skb);
1651                 goto errout;
1652         }
1653         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1654         return;
1655 errout:
1656         if (err < 0)
1657                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1658 }
1659
1660 static size_t inet_get_link_af_size(const struct net_device *dev,
1661                                     u32 ext_filter_mask)
1662 {
1663         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1664
1665         if (!in_dev)
1666                 return 0;
1667
1668         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1669 }
1670
1671 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1672                              u32 ext_filter_mask)
1673 {
1674         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1675         struct nlattr *nla;
1676         int i;
1677
1678         if (!in_dev)
1679                 return -ENODATA;
1680
1681         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1682         if (!nla)
1683                 return -EMSGSIZE;
1684
1685         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1686                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1687
1688         return 0;
1689 }
1690
1691 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1692         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1693 };
1694
1695 static int inet_validate_link_af(const struct net_device *dev,
1696                                  const struct nlattr *nla)
1697 {
1698         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1699         int err, rem;
1700
1701         if (dev && !__in_dev_get_rtnl(dev))
1702                 return -EAFNOSUPPORT;
1703
1704         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1705         if (err < 0)
1706                 return err;
1707
1708         if (tb[IFLA_INET_CONF]) {
1709                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1710                         int cfgid = nla_type(a);
1711
1712                         if (nla_len(a) < 4)
1713                                 return -EINVAL;
1714
1715                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1716                                 return -EINVAL;
1717                 }
1718         }
1719
1720         return 0;
1721 }
1722
1723 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1724 {
1725         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1726         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1727         int rem;
1728
1729         if (!in_dev)
1730                 return -EAFNOSUPPORT;
1731
1732         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1733                 BUG();
1734
1735         if (tb[IFLA_INET_CONF]) {
1736                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1737                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1738         }
1739
1740         return 0;
1741 }
1742
1743 static int inet_netconf_msgsize_devconf(int type)
1744 {
1745         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1746                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1747
1748         /* type -1 is used for ALL */
1749         if (type == -1 || type == NETCONFA_FORWARDING)
1750                 size += nla_total_size(4);
1751         if (type == -1 || type == NETCONFA_RP_FILTER)
1752                 size += nla_total_size(4);
1753         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1754                 size += nla_total_size(4);
1755         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1756                 size += nla_total_size(4);
1757         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1758                 size += nla_total_size(4);
1759
1760         return size;
1761 }
1762
1763 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1764                                      struct ipv4_devconf *devconf, u32 portid,
1765                                      u32 seq, int event, unsigned int flags,
1766                                      int type)
1767 {
1768         struct nlmsghdr  *nlh;
1769         struct netconfmsg *ncm;
1770
1771         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1772                         flags);
1773         if (!nlh)
1774                 return -EMSGSIZE;
1775
1776         ncm = nlmsg_data(nlh);
1777         ncm->ncm_family = AF_INET;
1778
1779         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1780                 goto nla_put_failure;
1781
1782         /* type -1 is used for ALL */
1783         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1784             nla_put_s32(skb, NETCONFA_FORWARDING,
1785                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1786                 goto nla_put_failure;
1787         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1788             nla_put_s32(skb, NETCONFA_RP_FILTER,
1789                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1790                 goto nla_put_failure;
1791         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1792             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1793                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1794                 goto nla_put_failure;
1795         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1796             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1797                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1798                 goto nla_put_failure;
1799         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1800             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1801                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1802                 goto nla_put_failure;
1803
1804         nlmsg_end(skb, nlh);
1805         return 0;
1806
1807 nla_put_failure:
1808         nlmsg_cancel(skb, nlh);
1809         return -EMSGSIZE;
1810 }
1811
1812 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1813                                  struct ipv4_devconf *devconf)
1814 {
1815         struct sk_buff *skb;
1816         int err = -ENOBUFS;
1817
1818         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1819         if (!skb)
1820                 goto errout;
1821
1822         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1823                                         RTM_NEWNETCONF, 0, type);
1824         if (err < 0) {
1825                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1826                 WARN_ON(err == -EMSGSIZE);
1827                 kfree_skb(skb);
1828                 goto errout;
1829         }
1830         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1831         return;
1832 errout:
1833         if (err < 0)
1834                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1835 }
1836
1837 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1838         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1839         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1840         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1841         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1842         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1843 };
1844
1845 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1846                                     struct nlmsghdr *nlh)
1847 {
1848         struct net *net = sock_net(in_skb->sk);
1849         struct nlattr *tb[NETCONFA_MAX+1];
1850         struct netconfmsg *ncm;
1851         struct sk_buff *skb;
1852         struct ipv4_devconf *devconf;
1853         struct in_device *in_dev;
1854         struct net_device *dev;
1855         int ifindex;
1856         int err;
1857
1858         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1859                           devconf_ipv4_policy);
1860         if (err < 0)
1861                 goto errout;
1862
1863         err = -EINVAL;
1864         if (!tb[NETCONFA_IFINDEX])
1865                 goto errout;
1866
1867         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1868         switch (ifindex) {
1869         case NETCONFA_IFINDEX_ALL:
1870                 devconf = net->ipv4.devconf_all;
1871                 break;
1872         case NETCONFA_IFINDEX_DEFAULT:
1873                 devconf = net->ipv4.devconf_dflt;
1874                 break;
1875         default:
1876                 dev = __dev_get_by_index(net, ifindex);
1877                 if (!dev)
1878                         goto errout;
1879                 in_dev = __in_dev_get_rtnl(dev);
1880                 if (!in_dev)
1881                         goto errout;
1882                 devconf = &in_dev->cnf;
1883                 break;
1884         }
1885
1886         err = -ENOBUFS;
1887         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_KERNEL);
1888         if (!skb)
1889                 goto errout;
1890
1891         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1892                                         NETLINK_CB(in_skb).portid,
1893                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1894                                         -1);
1895         if (err < 0) {
1896                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1897                 WARN_ON(err == -EMSGSIZE);
1898                 kfree_skb(skb);
1899                 goto errout;
1900         }
1901         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1902 errout:
1903         return err;
1904 }
1905
1906 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1907                                      struct netlink_callback *cb)
1908 {
1909         struct net *net = sock_net(skb->sk);
1910         int h, s_h;
1911         int idx, s_idx;
1912         struct net_device *dev;
1913         struct in_device *in_dev;
1914         struct hlist_head *head;
1915
1916         s_h = cb->args[0];
1917         s_idx = idx = cb->args[1];
1918
1919         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1920                 idx = 0;
1921                 head = &net->dev_index_head[h];
1922                 rcu_read_lock();
1923                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1924                           net->dev_base_seq;
1925                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1926                         if (idx < s_idx)
1927                                 goto cont;
1928                         in_dev = __in_dev_get_rcu(dev);
1929                         if (!in_dev)
1930                                 goto cont;
1931
1932                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1933                                                       &in_dev->cnf,
1934                                                       NETLINK_CB(cb->skb).portid,
1935                                                       cb->nlh->nlmsg_seq,
1936                                                       RTM_NEWNETCONF,
1937                                                       NLM_F_MULTI,
1938                                                       -1) < 0) {
1939                                 rcu_read_unlock();
1940                                 goto done;
1941                         }
1942                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1943 cont:
1944                         idx++;
1945                 }
1946                 rcu_read_unlock();
1947         }
1948         if (h == NETDEV_HASHENTRIES) {
1949                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1950                                               net->ipv4.devconf_all,
1951                                               NETLINK_CB(cb->skb).portid,
1952                                               cb->nlh->nlmsg_seq,
1953                                               RTM_NEWNETCONF, NLM_F_MULTI,
1954                                               -1) < 0)
1955                         goto done;
1956                 else
1957                         h++;
1958         }
1959         if (h == NETDEV_HASHENTRIES + 1) {
1960                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1961                                               net->ipv4.devconf_dflt,
1962                                               NETLINK_CB(cb->skb).portid,
1963                                               cb->nlh->nlmsg_seq,
1964                                               RTM_NEWNETCONF, NLM_F_MULTI,
1965                                               -1) < 0)
1966                         goto done;
1967                 else
1968                         h++;
1969         }
1970 done:
1971         cb->args[0] = h;
1972         cb->args[1] = idx;
1973
1974         return skb->len;
1975 }
1976
1977 #ifdef CONFIG_SYSCTL
1978
1979 static void devinet_copy_dflt_conf(struct net *net, int i)
1980 {
1981         struct net_device *dev;
1982
1983         rcu_read_lock();
1984         for_each_netdev_rcu(net, dev) {
1985                 struct in_device *in_dev;
1986
1987                 in_dev = __in_dev_get_rcu(dev);
1988                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1989                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1990         }
1991         rcu_read_unlock();
1992 }
1993
1994 /* called with RTNL locked */
1995 static void inet_forward_change(struct net *net)
1996 {
1997         struct net_device *dev;
1998         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1999
2000         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2001         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2002         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2003                                     NETCONFA_IFINDEX_ALL,
2004                                     net->ipv4.devconf_all);
2005         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2006                                     NETCONFA_IFINDEX_DEFAULT,
2007                                     net->ipv4.devconf_dflt);
2008
2009         for_each_netdev(net, dev) {
2010                 struct in_device *in_dev;
2011
2012                 if (on)
2013                         dev_disable_lro(dev);
2014
2015                 in_dev = __in_dev_get_rtnl(dev);
2016                 if (in_dev) {
2017                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2018                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2019                                                     dev->ifindex, &in_dev->cnf);
2020                 }
2021         }
2022 }
2023
2024 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2025 {
2026         if (cnf == net->ipv4.devconf_dflt)
2027                 return NETCONFA_IFINDEX_DEFAULT;
2028         else if (cnf == net->ipv4.devconf_all)
2029                 return NETCONFA_IFINDEX_ALL;
2030         else {
2031                 struct in_device *idev
2032                         = container_of(cnf, struct in_device, cnf);
2033                 return idev->dev->ifindex;
2034         }
2035 }
2036
2037 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2038                              void __user *buffer,
2039                              size_t *lenp, loff_t *ppos)
2040 {
2041         int old_value = *(int *)ctl->data;
2042         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2043         int new_value = *(int *)ctl->data;
2044
2045         if (write) {
2046                 struct ipv4_devconf *cnf = ctl->extra1;
2047                 struct net *net = ctl->extra2;
2048                 int i = (int *)ctl->data - cnf->data;
2049                 int ifindex;
2050
2051                 set_bit(i, cnf->state);
2052
2053                 if (cnf == net->ipv4.devconf_dflt)
2054                         devinet_copy_dflt_conf(net, i);
2055                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2056                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2057                         if ((new_value == 0) && (old_value != 0))
2058                                 rt_cache_flush(net);
2059
2060                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2061                     new_value != old_value) {
2062                         ifindex = devinet_conf_ifindex(net, cnf);
2063                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2064                                                     ifindex, cnf);
2065                 }
2066                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2067                     new_value != old_value) {
2068                         ifindex = devinet_conf_ifindex(net, cnf);
2069                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2070                                                     ifindex, cnf);
2071                 }
2072                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2073                     new_value != old_value) {
2074                         ifindex = devinet_conf_ifindex(net, cnf);
2075                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2076                                                     ifindex, cnf);
2077                 }
2078         }
2079
2080         return ret;
2081 }
2082
2083 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2084                                   void __user *buffer,
2085                                   size_t *lenp, loff_t *ppos)
2086 {
2087         int *valp = ctl->data;
2088         int val = *valp;
2089         loff_t pos = *ppos;
2090         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2091
2092         if (write && *valp != val) {
2093                 struct net *net = ctl->extra2;
2094
2095                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2096                         if (!rtnl_trylock()) {
2097                                 /* Restore the original values before restarting */
2098                                 *valp = val;
2099                                 *ppos = pos;
2100                                 return restart_syscall();
2101                         }
2102                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2103                                 inet_forward_change(net);
2104                         } else {
2105                                 struct ipv4_devconf *cnf = ctl->extra1;
2106                                 struct in_device *idev =
2107                                         container_of(cnf, struct in_device, cnf);
2108                                 if (*valp)
2109                                         dev_disable_lro(idev->dev);
2110                                 inet_netconf_notify_devconf(net,
2111                                                             NETCONFA_FORWARDING,
2112                                                             idev->dev->ifindex,
2113                                                             cnf);
2114                         }
2115                         rtnl_unlock();
2116                         rt_cache_flush(net);
2117                 } else
2118                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2119                                                     NETCONFA_IFINDEX_DEFAULT,
2120                                                     net->ipv4.devconf_dflt);
2121         }
2122
2123         return ret;
2124 }
2125
2126 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2127                                 void __user *buffer,
2128                                 size_t *lenp, loff_t *ppos)
2129 {
2130         int *valp = ctl->data;
2131         int val = *valp;
2132         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2133         struct net *net = ctl->extra2;
2134
2135         if (write && *valp != val)
2136                 rt_cache_flush(net);
2137
2138         return ret;
2139 }
2140
2141 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2142         { \
2143                 .procname       = name, \
2144                 .data           = ipv4_devconf.data + \
2145                                   IPV4_DEVCONF_ ## attr - 1, \
2146                 .maxlen         = sizeof(int), \
2147                 .mode           = mval, \
2148                 .proc_handler   = proc, \
2149                 .extra1         = &ipv4_devconf, \
2150         }
2151
2152 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2153         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2154
2155 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2156         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2157
2158 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2159         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2160
2161 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2162         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2163
2164 static struct devinet_sysctl_table {
2165         struct ctl_table_header *sysctl_header;
2166         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2167 } devinet_sysctl = {
2168         .devinet_vars = {
2169                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2170                                              devinet_sysctl_forward),
2171                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2172
2173                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2174                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2175                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2176                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2177                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2178                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2179                                         "accept_source_route"),
2180                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2181                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2182                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2183                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2184                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2185                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2186                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2187                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2188                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2189                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2190                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2191                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2192                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2193                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2194                                         "force_igmp_version"),
2195                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2196                                         "igmpv2_unsolicited_report_interval"),
2197                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2198                                         "igmpv3_unsolicited_report_interval"),
2199                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2200                                         "ignore_routes_with_linkdown"),
2201
2202                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2203                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2204                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2205                                               "promote_secondaries"),
2206                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2207                                               "route_localnet"),
2208         },
2209 };
2210
2211 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2212                                         struct ipv4_devconf *p)
2213 {
2214         int i;
2215         struct devinet_sysctl_table *t;
2216         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2217
2218         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2219         if (!t)
2220                 goto out;
2221
2222         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2223                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2224                 t->devinet_vars[i].extra1 = p;
2225                 t->devinet_vars[i].extra2 = net;
2226         }
2227
2228         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2229
2230         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2231         if (!t->sysctl_header)
2232                 goto free;
2233
2234         p->sysctl = t;
2235         return 0;
2236
2237 free:
2238         kfree(t);
2239 out:
2240         return -ENOBUFS;
2241 }
2242
2243 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2244 {
2245         struct devinet_sysctl_table *t = cnf->sysctl;
2246
2247         if (!t)
2248                 return;
2249
2250         cnf->sysctl = NULL;
2251         unregister_net_sysctl_table(t->sysctl_header);
2252         kfree(t);
2253 }
2254
2255 static int devinet_sysctl_register(struct in_device *idev)
2256 {
2257         int err;
2258
2259         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2260                 return -EINVAL;
2261
2262         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2263         if (err)
2264                 return err;
2265         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2266                                         &idev->cnf);
2267         if (err)
2268                 neigh_sysctl_unregister(idev->arp_parms);
2269         return err;
2270 }
2271
2272 static void devinet_sysctl_unregister(struct in_device *idev)
2273 {
2274         __devinet_sysctl_unregister(&idev->cnf);
2275         neigh_sysctl_unregister(idev->arp_parms);
2276 }
2277
2278 static struct ctl_table ctl_forward_entry[] = {
2279         {
2280                 .procname       = "ip_forward",
2281                 .data           = &ipv4_devconf.data[
2282                                         IPV4_DEVCONF_FORWARDING - 1],
2283                 .maxlen         = sizeof(int),
2284                 .mode           = 0644,
2285                 .proc_handler   = devinet_sysctl_forward,
2286                 .extra1         = &ipv4_devconf,
2287                 .extra2         = &init_net,
2288         },
2289         { },
2290 };
2291 #endif
2292
2293 static __net_init int devinet_init_net(struct net *net)
2294 {
2295         int err;
2296         struct ipv4_devconf *all, *dflt;
2297 #ifdef CONFIG_SYSCTL
2298         struct ctl_table *tbl = ctl_forward_entry;
2299         struct ctl_table_header *forw_hdr;
2300 #endif
2301
2302         err = -ENOMEM;
2303         all = &ipv4_devconf;
2304         dflt = &ipv4_devconf_dflt;
2305
2306         if (!net_eq(net, &init_net)) {
2307                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2308                 if (!all)
2309                         goto err_alloc_all;
2310
2311                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2312                 if (!dflt)
2313                         goto err_alloc_dflt;
2314
2315 #ifdef CONFIG_SYSCTL
2316                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2317                 if (!tbl)
2318                         goto err_alloc_ctl;
2319
2320                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2321                 tbl[0].extra1 = all;
2322                 tbl[0].extra2 = net;
2323 #endif
2324         }
2325
2326 #ifdef CONFIG_SYSCTL
2327         err = __devinet_sysctl_register(net, "all", all);
2328         if (err < 0)
2329                 goto err_reg_all;
2330
2331         err = __devinet_sysctl_register(net, "default", dflt);
2332         if (err < 0)
2333                 goto err_reg_dflt;
2334
2335         err = -ENOMEM;
2336         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2337         if (!forw_hdr)
2338                 goto err_reg_ctl;
2339         net->ipv4.forw_hdr = forw_hdr;
2340 #endif
2341
2342         net->ipv4.devconf_all = all;
2343         net->ipv4.devconf_dflt = dflt;
2344         return 0;
2345
2346 #ifdef CONFIG_SYSCTL
2347 err_reg_ctl:
2348         __devinet_sysctl_unregister(dflt);
2349 err_reg_dflt:
2350         __devinet_sysctl_unregister(all);
2351 err_reg_all:
2352         if (tbl != ctl_forward_entry)
2353                 kfree(tbl);
2354 err_alloc_ctl:
2355 #endif
2356         if (dflt != &ipv4_devconf_dflt)
2357                 kfree(dflt);
2358 err_alloc_dflt:
2359         if (all != &ipv4_devconf)
2360                 kfree(all);
2361 err_alloc_all:
2362         return err;
2363 }
2364
2365 static __net_exit void devinet_exit_net(struct net *net)
2366 {
2367 #ifdef CONFIG_SYSCTL
2368         struct ctl_table *tbl;
2369
2370         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2371         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2372         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2373         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2374         kfree(tbl);
2375 #endif
2376         kfree(net->ipv4.devconf_dflt);
2377         kfree(net->ipv4.devconf_all);
2378 }
2379
2380 static __net_initdata struct pernet_operations devinet_ops = {
2381         .init = devinet_init_net,
2382         .exit = devinet_exit_net,
2383 };
2384
2385 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2386         .family           = AF_INET,
2387         .fill_link_af     = inet_fill_link_af,
2388         .get_link_af_size = inet_get_link_af_size,
2389         .validate_link_af = inet_validate_link_af,
2390         .set_link_af      = inet_set_link_af,
2391 };
2392
2393 void __init devinet_init(void)
2394 {
2395         int i;
2396
2397         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2398                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2399
2400         register_pernet_subsys(&devinet_ops);
2401
2402         register_gifconf(PF_INET, inet_gifconf);
2403         register_netdevice_notifier(&ip_netdev_notifier);
2404
2405         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2406
2407         rtnl_af_register(&inet_af_ops);
2408
2409         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2410         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2411         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2412         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2413                       inet_netconf_dump_devconf, NULL);
2414 }