GNU Linux-libre 4.9.294-gnu1
[releases.git] / drivers / net / ipvlan / ipvlan_main.c
1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License as
5  * published by the Free Software Foundation; either version 2 of
6  * the License, or (at your option) any later version.
7  *
8  */
9
10 #include "ipvlan.h"
11
12 static u32 ipvl_nf_hook_refcnt = 0;
13
14 static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
15         {
16                 .hook     = ipvlan_nf_input,
17                 .pf       = NFPROTO_IPV4,
18                 .hooknum  = NF_INET_LOCAL_IN,
19                 .priority = INT_MAX,
20         },
21         {
22                 .hook     = ipvlan_nf_input,
23                 .pf       = NFPROTO_IPV6,
24                 .hooknum  = NF_INET_LOCAL_IN,
25                 .priority = INT_MAX,
26         },
27 };
28
29 static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
30         .l3mdev_l3_rcv = ipvlan_l3_rcv,
31 };
32
33 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
34 {
35         ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
36 }
37
38 static int ipvlan_register_nf_hook(void)
39 {
40         int err = 0;
41
42         if (!ipvl_nf_hook_refcnt) {
43                 err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
44                 if (!err)
45                         ipvl_nf_hook_refcnt = 1;
46         } else {
47                 ipvl_nf_hook_refcnt++;
48         }
49
50         return err;
51 }
52
53 static void ipvlan_unregister_nf_hook(void)
54 {
55         WARN_ON(!ipvl_nf_hook_refcnt);
56
57         ipvl_nf_hook_refcnt--;
58         if (!ipvl_nf_hook_refcnt)
59                 _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
60 }
61
62 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
63 {
64         struct ipvl_dev *ipvlan;
65         struct net_device *mdev = port->dev;
66         unsigned int flags;
67         int err;
68
69         ASSERT_RTNL();
70         if (port->mode != nval) {
71                 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
72                         flags = ipvlan->dev->flags;
73                         if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
74                                 err = dev_change_flags(ipvlan->dev,
75                                                        flags | IFF_NOARP);
76                         } else {
77                                 err = dev_change_flags(ipvlan->dev,
78                                                        flags & ~IFF_NOARP);
79                         }
80                         if (unlikely(err))
81                                 goto fail;
82                 }
83                 if (nval == IPVLAN_MODE_L3S) {
84                         /* New mode is L3S */
85                         err = ipvlan_register_nf_hook();
86                         if (!err) {
87                                 mdev->l3mdev_ops = &ipvl_l3mdev_ops;
88                                 mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
89                         } else
90                                 goto fail;
91                 } else if (port->mode == IPVLAN_MODE_L3S) {
92                         /* Old mode was L3S */
93                         mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
94                         ipvlan_unregister_nf_hook();
95                         mdev->l3mdev_ops = NULL;
96                 }
97                 port->mode = nval;
98         }
99         return 0;
100
101 fail:
102         /* Undo the flags changes that have been done so far. */
103         list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
104                 flags = ipvlan->dev->flags;
105                 if (port->mode == IPVLAN_MODE_L3 ||
106                     port->mode == IPVLAN_MODE_L3S)
107                         dev_change_flags(ipvlan->dev, flags | IFF_NOARP);
108                 else
109                         dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP);
110         }
111
112         return err;
113 }
114
115 static int ipvlan_port_create(struct net_device *dev)
116 {
117         struct ipvl_port *port;
118         int err, idx;
119
120         if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) {
121                 netdev_err(dev, "Master is either lo or non-ether device\n");
122                 return -EINVAL;
123         }
124
125         if (netif_is_macvlan_port(dev)) {
126                 netdev_err(dev, "Master is a macvlan port.\n");
127                 return -EBUSY;
128         }
129
130         port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
131         if (!port)
132                 return -ENOMEM;
133
134         port->dev = dev;
135         port->mode = IPVLAN_MODE_L3;
136         INIT_LIST_HEAD(&port->ipvlans);
137         for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
138                 INIT_HLIST_HEAD(&port->hlhead[idx]);
139
140         skb_queue_head_init(&port->backlog);
141         INIT_WORK(&port->wq, ipvlan_process_multicast);
142
143         err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
144         if (err)
145                 goto err;
146
147         dev->priv_flags |= IFF_IPVLAN_MASTER;
148         return 0;
149
150 err:
151         kfree_rcu(port, rcu);
152         return err;
153 }
154
155 static void ipvlan_port_destroy(struct net_device *dev)
156 {
157         struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
158
159         dev->priv_flags &= ~IFF_IPVLAN_MASTER;
160         if (port->mode == IPVLAN_MODE_L3S) {
161                 dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
162                 ipvlan_unregister_nf_hook();
163                 dev->l3mdev_ops = NULL;
164         }
165         netdev_rx_handler_unregister(dev);
166         cancel_work_sync(&port->wq);
167         __skb_queue_purge(&port->backlog);
168         kfree_rcu(port, rcu);
169 }
170
171 #define IPVLAN_ALWAYS_ON_OFLOADS \
172         (NETIF_F_SG | NETIF_F_HW_CSUM | \
173          NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL)
174
175 #define IPVLAN_ALWAYS_ON \
176         (IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED)
177
178 #define IPVLAN_FEATURES \
179         (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
180          NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
181          NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
182          NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
183
184         /* NETIF_F_GSO_ENCAP_ALL NETIF_F_GSO_SOFTWARE Newly added */
185
186 #define IPVLAN_STATE_MASK \
187         ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
188
189 static int ipvlan_init(struct net_device *dev)
190 {
191         struct ipvl_dev *ipvlan = netdev_priv(dev);
192         const struct net_device *phy_dev = ipvlan->phy_dev;
193         struct ipvl_port *port = ipvlan->port;
194
195         dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
196                      (phy_dev->state & IPVLAN_STATE_MASK);
197         dev->features = phy_dev->features & IPVLAN_FEATURES;
198         dev->features |= IPVLAN_ALWAYS_ON;
199         dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES;
200         dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS;
201         dev->gso_max_size = phy_dev->gso_max_size;
202         dev->gso_max_segs = phy_dev->gso_max_segs;
203         dev->hard_header_len = phy_dev->hard_header_len;
204
205         netdev_lockdep_set_classes(dev);
206
207         ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
208         if (!ipvlan->pcpu_stats)
209                 return -ENOMEM;
210
211         port->count += 1;
212
213         return 0;
214 }
215
216 static void ipvlan_uninit(struct net_device *dev)
217 {
218         struct ipvl_dev *ipvlan = netdev_priv(dev);
219         struct ipvl_port *port = ipvlan->port;
220
221         free_percpu(ipvlan->pcpu_stats);
222
223         port->count -= 1;
224         if (!port->count)
225                 ipvlan_port_destroy(port->dev);
226 }
227
228 static int ipvlan_open(struct net_device *dev)
229 {
230         struct ipvl_dev *ipvlan = netdev_priv(dev);
231         struct ipvl_addr *addr;
232
233         if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
234             ipvlan->port->mode == IPVLAN_MODE_L3S)
235                 dev->flags |= IFF_NOARP;
236         else
237                 dev->flags &= ~IFF_NOARP;
238
239         list_for_each_entry(addr, &ipvlan->addrs, anode)
240                 ipvlan_ht_addr_add(ipvlan, addr);
241
242         return 0;
243 }
244
245 static int ipvlan_stop(struct net_device *dev)
246 {
247         struct ipvl_dev *ipvlan = netdev_priv(dev);
248         struct net_device *phy_dev = ipvlan->phy_dev;
249         struct ipvl_addr *addr;
250
251         dev_uc_unsync(phy_dev, dev);
252         dev_mc_unsync(phy_dev, dev);
253
254         list_for_each_entry(addr, &ipvlan->addrs, anode)
255                 ipvlan_ht_addr_del(addr);
256
257         return 0;
258 }
259
260 static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
261                                      struct net_device *dev)
262 {
263         const struct ipvl_dev *ipvlan = netdev_priv(dev);
264         int skblen = skb->len;
265         int ret;
266
267         ret = ipvlan_queue_xmit(skb, dev);
268         if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
269                 struct ipvl_pcpu_stats *pcptr;
270
271                 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
272
273                 u64_stats_update_begin(&pcptr->syncp);
274                 pcptr->tx_pkts++;
275                 pcptr->tx_bytes += skblen;
276                 u64_stats_update_end(&pcptr->syncp);
277         } else {
278                 this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
279         }
280         return ret;
281 }
282
283 static netdev_features_t ipvlan_fix_features(struct net_device *dev,
284                                              netdev_features_t features)
285 {
286         struct ipvl_dev *ipvlan = netdev_priv(dev);
287
288         features |= NETIF_F_ALL_FOR_ALL;
289         features &= (ipvlan->sfeatures | ~IPVLAN_FEATURES);
290         features = netdev_increment_features(ipvlan->phy_dev->features,
291                                              features, features);
292         features |= IPVLAN_ALWAYS_ON;
293         features &= (IPVLAN_FEATURES | IPVLAN_ALWAYS_ON);
294
295         return features;
296 }
297
298 static void ipvlan_change_rx_flags(struct net_device *dev, int change)
299 {
300         struct ipvl_dev *ipvlan = netdev_priv(dev);
301         struct net_device *phy_dev = ipvlan->phy_dev;
302
303         if (change & IFF_ALLMULTI)
304                 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
305 }
306
307 static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
308 {
309         struct ipvl_dev *ipvlan = netdev_priv(dev);
310
311         if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
312                 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
313         } else {
314                 struct netdev_hw_addr *ha;
315                 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
316
317                 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
318                 netdev_for_each_mc_addr(ha, dev)
319                         __set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
320
321                 /* Turn-on broadcast bit irrespective of address family,
322                  * since broadcast is deferred to a work-queue, hence no
323                  * impact on fast-path processing.
324                  */
325                 __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
326
327                 bitmap_copy(ipvlan->mac_filters, mc_filters,
328                             IPVLAN_MAC_FILTER_SIZE);
329         }
330         dev_uc_sync(ipvlan->phy_dev, dev);
331         dev_mc_sync(ipvlan->phy_dev, dev);
332 }
333
334 static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev,
335                                                     struct rtnl_link_stats64 *s)
336 {
337         struct ipvl_dev *ipvlan = netdev_priv(dev);
338
339         if (ipvlan->pcpu_stats) {
340                 struct ipvl_pcpu_stats *pcptr;
341                 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
342                 u32 rx_errs = 0, tx_drps = 0;
343                 u32 strt;
344                 int idx;
345
346                 for_each_possible_cpu(idx) {
347                         pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
348                         do {
349                                 strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
350                                 rx_pkts = pcptr->rx_pkts;
351                                 rx_bytes = pcptr->rx_bytes;
352                                 rx_mcast = pcptr->rx_mcast;
353                                 tx_pkts = pcptr->tx_pkts;
354                                 tx_bytes = pcptr->tx_bytes;
355                         } while (u64_stats_fetch_retry_irq(&pcptr->syncp,
356                                                            strt));
357
358                         s->rx_packets += rx_pkts;
359                         s->rx_bytes += rx_bytes;
360                         s->multicast += rx_mcast;
361                         s->tx_packets += tx_pkts;
362                         s->tx_bytes += tx_bytes;
363
364                         /* u32 values are updated without syncp protection. */
365                         rx_errs += pcptr->rx_errs;
366                         tx_drps += pcptr->tx_drps;
367                 }
368                 s->rx_errors = rx_errs;
369                 s->rx_dropped = rx_errs;
370                 s->tx_dropped = tx_drps;
371         }
372         return s;
373 }
374
375 static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
376 {
377         struct ipvl_dev *ipvlan = netdev_priv(dev);
378         struct net_device *phy_dev = ipvlan->phy_dev;
379
380         return vlan_vid_add(phy_dev, proto, vid);
381 }
382
383 static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
384                                    u16 vid)
385 {
386         struct ipvl_dev *ipvlan = netdev_priv(dev);
387         struct net_device *phy_dev = ipvlan->phy_dev;
388
389         vlan_vid_del(phy_dev, proto, vid);
390         return 0;
391 }
392
393 static int ipvlan_get_iflink(const struct net_device *dev)
394 {
395         struct ipvl_dev *ipvlan = netdev_priv(dev);
396
397         return ipvlan->phy_dev->ifindex;
398 }
399
400 static const struct net_device_ops ipvlan_netdev_ops = {
401         .ndo_init               = ipvlan_init,
402         .ndo_uninit             = ipvlan_uninit,
403         .ndo_open               = ipvlan_open,
404         .ndo_stop               = ipvlan_stop,
405         .ndo_start_xmit         = ipvlan_start_xmit,
406         .ndo_fix_features       = ipvlan_fix_features,
407         .ndo_change_rx_flags    = ipvlan_change_rx_flags,
408         .ndo_set_rx_mode        = ipvlan_set_multicast_mac_filter,
409         .ndo_get_stats64        = ipvlan_get_stats64,
410         .ndo_vlan_rx_add_vid    = ipvlan_vlan_rx_add_vid,
411         .ndo_vlan_rx_kill_vid   = ipvlan_vlan_rx_kill_vid,
412         .ndo_get_iflink         = ipvlan_get_iflink,
413 };
414
415 static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
416                               unsigned short type, const void *daddr,
417                               const void *saddr, unsigned len)
418 {
419         const struct ipvl_dev *ipvlan = netdev_priv(dev);
420         struct net_device *phy_dev = ipvlan->phy_dev;
421
422         /* TODO Probably use a different field than dev_addr so that the
423          * mac-address on the virtual device is portable and can be carried
424          * while the packets use the mac-addr on the physical device.
425          */
426         return dev_hard_header(skb, phy_dev, type, daddr,
427                                saddr ? : dev->dev_addr, len);
428 }
429
430 static const struct header_ops ipvlan_header_ops = {
431         .create         = ipvlan_hard_header,
432         .parse          = eth_header_parse,
433         .cache          = eth_header_cache,
434         .cache_update   = eth_header_cache_update,
435 };
436
437 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
438                                              struct ethtool_link_ksettings *cmd)
439 {
440         const struct ipvl_dev *ipvlan = netdev_priv(dev);
441
442         return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
443 }
444
445 static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
446                                        struct ethtool_drvinfo *drvinfo)
447 {
448         strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
449         strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
450 }
451
452 static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
453 {
454         const struct ipvl_dev *ipvlan = netdev_priv(dev);
455
456         return ipvlan->msg_enable;
457 }
458
459 static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
460 {
461         struct ipvl_dev *ipvlan = netdev_priv(dev);
462
463         ipvlan->msg_enable = value;
464 }
465
466 static const struct ethtool_ops ipvlan_ethtool_ops = {
467         .get_link       = ethtool_op_get_link,
468         .get_link_ksettings     = ipvlan_ethtool_get_link_ksettings,
469         .get_drvinfo    = ipvlan_ethtool_get_drvinfo,
470         .get_msglevel   = ipvlan_ethtool_get_msglevel,
471         .set_msglevel   = ipvlan_ethtool_set_msglevel,
472 };
473
474 static int ipvlan_nl_changelink(struct net_device *dev,
475                                 struct nlattr *tb[], struct nlattr *data[])
476 {
477         struct ipvl_dev *ipvlan = netdev_priv(dev);
478         struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
479         int err = 0;
480
481         if (!data)
482                 return 0;
483         if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
484                 return -EPERM;
485
486         if (data[IFLA_IPVLAN_MODE]) {
487                 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
488
489                 err = ipvlan_set_port_mode(port, nmode);
490         }
491         return err;
492 }
493
494 static size_t ipvlan_nl_getsize(const struct net_device *dev)
495 {
496         return (0
497                 + nla_total_size(2) /* IFLA_IPVLAN_MODE */
498                 );
499 }
500
501 static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[])
502 {
503         if (data && data[IFLA_IPVLAN_MODE]) {
504                 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
505
506                 if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
507                         return -EINVAL;
508         }
509         return 0;
510 }
511
512 static int ipvlan_nl_fillinfo(struct sk_buff *skb,
513                               const struct net_device *dev)
514 {
515         struct ipvl_dev *ipvlan = netdev_priv(dev);
516         struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
517         int ret = -EINVAL;
518
519         if (!port)
520                 goto err;
521
522         ret = -EMSGSIZE;
523         if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
524                 goto err;
525
526         return 0;
527
528 err:
529         return ret;
530 }
531
532 static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
533                            struct nlattr *tb[], struct nlattr *data[])
534 {
535         struct ipvl_dev *ipvlan = netdev_priv(dev);
536         struct ipvl_port *port;
537         struct net_device *phy_dev;
538         int err;
539         u16 mode = IPVLAN_MODE_L3;
540         bool create = false;
541
542         if (!tb[IFLA_LINK])
543                 return -EINVAL;
544
545         phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
546         if (!phy_dev)
547                 return -ENODEV;
548
549         if (netif_is_ipvlan(phy_dev)) {
550                 struct ipvl_dev *tmp = netdev_priv(phy_dev);
551
552                 phy_dev = tmp->phy_dev;
553                 if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
554                         return -EPERM;
555         } else if (!netif_is_ipvlan_port(phy_dev)) {
556                 err = ipvlan_port_create(phy_dev);
557                 if (err < 0)
558                         return err;
559                 create = true;
560         }
561
562         if (data && data[IFLA_IPVLAN_MODE])
563                 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
564
565         port = ipvlan_port_get_rtnl(phy_dev);
566         ipvlan->phy_dev = phy_dev;
567         ipvlan->dev = dev;
568         ipvlan->port = port;
569         ipvlan->sfeatures = IPVLAN_FEATURES;
570         if (!tb[IFLA_MTU])
571                 ipvlan_adjust_mtu(ipvlan, phy_dev);
572         INIT_LIST_HEAD(&ipvlan->addrs);
573
574         /* TODO Probably put random address here to be presented to the
575          * world but keep using the physical-dev address for the outgoing
576          * packets.
577          */
578         memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
579
580         dev->priv_flags |= IFF_IPVLAN_SLAVE;
581
582         err = register_netdevice(dev);
583         if (err < 0)
584                 goto destroy_ipvlan_port;
585
586         err = netdev_upper_dev_link(phy_dev, dev);
587         if (err) {
588                 goto unregister_netdev;
589         }
590         err = ipvlan_set_port_mode(port, mode);
591         if (err) {
592                 goto unlink_netdev;
593         }
594
595         list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
596         netif_stacked_transfer_operstate(phy_dev, dev);
597         return 0;
598
599 unlink_netdev:
600         netdev_upper_dev_unlink(phy_dev, dev);
601 unregister_netdev:
602         unregister_netdevice(dev);
603 destroy_ipvlan_port:
604         if (create)
605                 ipvlan_port_destroy(phy_dev);
606         return err;
607 }
608
609 static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
610 {
611         struct ipvl_dev *ipvlan = netdev_priv(dev);
612         struct ipvl_addr *addr, *next;
613
614         list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
615                 ipvlan_ht_addr_del(addr);
616                 list_del(&addr->anode);
617                 kfree_rcu(addr, rcu);
618         }
619
620         list_del_rcu(&ipvlan->pnode);
621         unregister_netdevice_queue(dev, head);
622         netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
623 }
624
625 static void ipvlan_link_setup(struct net_device *dev)
626 {
627         ether_setup(dev);
628
629         dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
630         dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
631         dev->netdev_ops = &ipvlan_netdev_ops;
632         dev->destructor = free_netdev;
633         dev->header_ops = &ipvlan_header_ops;
634         dev->ethtool_ops = &ipvlan_ethtool_ops;
635 }
636
637 static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
638 {
639         [IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
640 };
641
642 static struct rtnl_link_ops ipvlan_link_ops = {
643         .kind           = "ipvlan",
644         .priv_size      = sizeof(struct ipvl_dev),
645
646         .get_size       = ipvlan_nl_getsize,
647         .policy         = ipvlan_nl_policy,
648         .validate       = ipvlan_nl_validate,
649         .fill_info      = ipvlan_nl_fillinfo,
650         .changelink     = ipvlan_nl_changelink,
651         .maxtype        = IFLA_IPVLAN_MAX,
652
653         .setup          = ipvlan_link_setup,
654         .newlink        = ipvlan_link_new,
655         .dellink        = ipvlan_link_delete,
656 };
657
658 static int ipvlan_link_register(struct rtnl_link_ops *ops)
659 {
660         return rtnl_link_register(ops);
661 }
662
663 static int ipvlan_device_event(struct notifier_block *unused,
664                                unsigned long event, void *ptr)
665 {
666         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
667         struct ipvl_dev *ipvlan, *next;
668         struct ipvl_port *port;
669         LIST_HEAD(lst_kill);
670
671         if (!netif_is_ipvlan_port(dev))
672                 return NOTIFY_DONE;
673
674         port = ipvlan_port_get_rtnl(dev);
675
676         switch (event) {
677         case NETDEV_CHANGE:
678                 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
679                         netif_stacked_transfer_operstate(ipvlan->phy_dev,
680                                                          ipvlan->dev);
681                 break;
682
683         case NETDEV_UNREGISTER:
684                 if (dev->reg_state != NETREG_UNREGISTERING)
685                         break;
686
687                 list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
688                                          pnode)
689                         ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
690                                                             &lst_kill);
691                 unregister_netdevice_many(&lst_kill);
692                 break;
693
694         case NETDEV_FEAT_CHANGE:
695                 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
696                         ipvlan->dev->gso_max_size = dev->gso_max_size;
697                         ipvlan->dev->gso_max_segs = dev->gso_max_segs;
698                         netdev_update_features(ipvlan->dev);
699                 }
700                 break;
701
702         case NETDEV_CHANGEMTU:
703                 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
704                         ipvlan_adjust_mtu(ipvlan, dev);
705                 break;
706
707         case NETDEV_PRE_TYPE_CHANGE:
708                 /* Forbid underlying device to change its type. */
709                 return NOTIFY_BAD;
710         }
711         return NOTIFY_DONE;
712 }
713
714 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
715 {
716         struct ipvl_addr *addr;
717
718         if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
719                 netif_err(ipvlan, ifup, ipvlan->dev,
720                           "Failed to add IPv6=%pI6c addr for %s intf\n",
721                           ip6_addr, ipvlan->dev->name);
722                 return -EINVAL;
723         }
724         addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
725         if (!addr)
726                 return -ENOMEM;
727
728         addr->master = ipvlan;
729         memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr));
730         addr->atype = IPVL_IPV6;
731         list_add_tail(&addr->anode, &ipvlan->addrs);
732
733         /* If the interface is not up, the address will be added to the hash
734          * list by ipvlan_open.
735          */
736         if (netif_running(ipvlan->dev))
737                 ipvlan_ht_addr_add(ipvlan, addr);
738
739         return 0;
740 }
741
742 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
743 {
744         struct ipvl_addr *addr;
745
746         addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
747         if (!addr)
748                 return;
749
750         ipvlan_ht_addr_del(addr);
751         list_del(&addr->anode);
752         kfree_rcu(addr, rcu);
753
754         return;
755 }
756
757 static int ipvlan_addr6_event(struct notifier_block *unused,
758                               unsigned long event, void *ptr)
759 {
760         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
761         struct net_device *dev = (struct net_device *)if6->idev->dev;
762         struct ipvl_dev *ipvlan = netdev_priv(dev);
763
764         /* FIXME IPv6 autoconf calls us from bh without RTNL */
765         if (in_softirq())
766                 return NOTIFY_DONE;
767
768         if (!netif_is_ipvlan(dev))
769                 return NOTIFY_DONE;
770
771         if (!ipvlan || !ipvlan->port)
772                 return NOTIFY_DONE;
773
774         switch (event) {
775         case NETDEV_UP:
776                 if (ipvlan_add_addr6(ipvlan, &if6->addr))
777                         return NOTIFY_BAD;
778                 break;
779
780         case NETDEV_DOWN:
781                 ipvlan_del_addr6(ipvlan, &if6->addr);
782                 break;
783         }
784
785         return NOTIFY_OK;
786 }
787
788 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
789 {
790         struct ipvl_addr *addr;
791
792         if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
793                 netif_err(ipvlan, ifup, ipvlan->dev,
794                           "Failed to add IPv4=%pI4 on %s intf.\n",
795                           ip4_addr, ipvlan->dev->name);
796                 return -EINVAL;
797         }
798         addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
799         if (!addr)
800                 return -ENOMEM;
801
802         addr->master = ipvlan;
803         memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr));
804         addr->atype = IPVL_IPV4;
805         list_add_tail(&addr->anode, &ipvlan->addrs);
806
807         /* If the interface is not up, the address will be added to the hash
808          * list by ipvlan_open.
809          */
810         if (netif_running(ipvlan->dev))
811                 ipvlan_ht_addr_add(ipvlan, addr);
812
813         return 0;
814 }
815
816 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
817 {
818         struct ipvl_addr *addr;
819
820         addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
821         if (!addr)
822                 return;
823
824         ipvlan_ht_addr_del(addr);
825         list_del(&addr->anode);
826         kfree_rcu(addr, rcu);
827
828         return;
829 }
830
831 static int ipvlan_addr4_event(struct notifier_block *unused,
832                               unsigned long event, void *ptr)
833 {
834         struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
835         struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
836         struct ipvl_dev *ipvlan = netdev_priv(dev);
837         struct in_addr ip4_addr;
838
839         if (!netif_is_ipvlan(dev))
840                 return NOTIFY_DONE;
841
842         if (!ipvlan || !ipvlan->port)
843                 return NOTIFY_DONE;
844
845         switch (event) {
846         case NETDEV_UP:
847                 ip4_addr.s_addr = if4->ifa_address;
848                 if (ipvlan_add_addr4(ipvlan, &ip4_addr))
849                         return NOTIFY_BAD;
850                 break;
851
852         case NETDEV_DOWN:
853                 ip4_addr.s_addr = if4->ifa_address;
854                 ipvlan_del_addr4(ipvlan, &ip4_addr);
855                 break;
856         }
857
858         return NOTIFY_OK;
859 }
860
861 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
862         .notifier_call = ipvlan_addr4_event,
863 };
864
865 static struct notifier_block ipvlan_notifier_block __read_mostly = {
866         .notifier_call = ipvlan_device_event,
867 };
868
869 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
870         .notifier_call = ipvlan_addr6_event,
871 };
872
873 static int __init ipvlan_init_module(void)
874 {
875         int err;
876
877         ipvlan_init_secret();
878         register_netdevice_notifier(&ipvlan_notifier_block);
879         register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
880         register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
881
882         err = ipvlan_link_register(&ipvlan_link_ops);
883         if (err < 0)
884                 goto error;
885
886         return 0;
887 error:
888         unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
889         unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
890         unregister_netdevice_notifier(&ipvlan_notifier_block);
891         return err;
892 }
893
894 static void __exit ipvlan_cleanup_module(void)
895 {
896         rtnl_link_unregister(&ipvlan_link_ops);
897         unregister_netdevice_notifier(&ipvlan_notifier_block);
898         unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
899         unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
900 }
901
902 module_init(ipvlan_init_module);
903 module_exit(ipvlan_cleanup_module);
904
905 MODULE_LICENSE("GPL");
906 MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
907 MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
908 MODULE_ALIAS_RTNL_LINK("ipvlan");