GNU Linux-libre 4.19.314-gnu1
[releases.git] / net / bridge / br_if.c
1 /*
2  *      Userspace interface
3  *      Linux ethernet bridge
4  *
5  *      Authors:
6  *      Lennert Buytenhek               <buytenh@gnu.org>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 #include <linux/kernel.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/netpoll.h>
18 #include <linux/ethtool.h>
19 #include <linux/if_arp.h>
20 #include <linux/module.h>
21 #include <linux/init.h>
22 #include <linux/rtnetlink.h>
23 #include <linux/if_ether.h>
24 #include <linux/slab.h>
25 #include <net/dsa.h>
26 #include <net/sock.h>
27 #include <linux/if_vlan.h>
28 #include <net/switchdev.h>
29 #include <net/net_namespace.h>
30
31 #include "br_private.h"
32
33 /*
34  * Determine initial path cost based on speed.
35  * using recommendations from 802.1d standard
36  *
37  * Since driver might sleep need to not be holding any locks.
38  */
39 static int port_cost(struct net_device *dev)
40 {
41         struct ethtool_link_ksettings ecmd;
42
43         if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
44                 switch (ecmd.base.speed) {
45                 case SPEED_10000:
46                         return 2;
47                 case SPEED_1000:
48                         return 4;
49                 case SPEED_100:
50                         return 19;
51                 case SPEED_10:
52                         return 100;
53                 }
54         }
55
56         /* Old silly heuristics based on name */
57         if (!strncmp(dev->name, "lec", 3))
58                 return 7;
59
60         if (!strncmp(dev->name, "plip", 4))
61                 return 2500;
62
63         return 100;     /* assume old 10Mbps */
64 }
65
66
67 /* Check for port carrier transitions. */
68 void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
69 {
70         struct net_device *dev = p->dev;
71         struct net_bridge *br = p->br;
72
73         if (!(p->flags & BR_ADMIN_COST) &&
74             netif_running(dev) && netif_oper_up(dev))
75                 p->path_cost = port_cost(dev);
76
77         *notified = false;
78         if (!netif_running(br->dev))
79                 return;
80
81         spin_lock_bh(&br->lock);
82         if (netif_running(dev) && netif_oper_up(dev)) {
83                 if (p->state == BR_STATE_DISABLED) {
84                         br_stp_enable_port(p);
85                         *notified = true;
86                 }
87         } else {
88                 if (p->state != BR_STATE_DISABLED) {
89                         br_stp_disable_port(p);
90                         *notified = true;
91                 }
92         }
93         spin_unlock_bh(&br->lock);
94 }
95
96 static void br_port_set_promisc(struct net_bridge_port *p)
97 {
98         int err = 0;
99
100         if (br_promisc_port(p))
101                 return;
102
103         err = dev_set_promiscuity(p->dev, 1);
104         if (err)
105                 return;
106
107         br_fdb_unsync_static(p->br, p);
108         p->flags |= BR_PROMISC;
109 }
110
111 static void br_port_clear_promisc(struct net_bridge_port *p)
112 {
113         int err;
114
115         /* Check if the port is already non-promisc or if it doesn't
116          * support UNICAST filtering.  Without unicast filtering support
117          * we'll end up re-enabling promisc mode anyway, so just check for
118          * it here.
119          */
120         if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
121                 return;
122
123         /* Since we'll be clearing the promisc mode, program the port
124          * first so that we don't have interruption in traffic.
125          */
126         err = br_fdb_sync_static(p->br, p);
127         if (err)
128                 return;
129
130         dev_set_promiscuity(p->dev, -1);
131         p->flags &= ~BR_PROMISC;
132 }
133
134 /* When a port is added or removed or when certain port flags
135  * change, this function is called to automatically manage
136  * promiscuity setting of all the bridge ports.  We are always called
137  * under RTNL so can skip using rcu primitives.
138  */
139 void br_manage_promisc(struct net_bridge *br)
140 {
141         struct net_bridge_port *p;
142         bool set_all = false;
143
144         /* If vlan filtering is disabled or bridge interface is placed
145          * into promiscuous mode, place all ports in promiscuous mode.
146          */
147         if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
148                 set_all = true;
149
150         list_for_each_entry(p, &br->port_list, list) {
151                 if (set_all) {
152                         br_port_set_promisc(p);
153                 } else {
154                         /* If the number of auto-ports is <= 1, then all other
155                          * ports will have their output configuration
156                          * statically specified through fdbs.  Since ingress
157                          * on the auto-port becomes forwarding/egress to other
158                          * ports and egress configuration is statically known,
159                          * we can say that ingress configuration of the
160                          * auto-port is also statically known.
161                          * This lets us disable promiscuous mode and write
162                          * this config to hw.
163                          */
164                         if ((p->dev->priv_flags & IFF_UNICAST_FLT) &&
165                             (br->auto_cnt == 0 ||
166                              (br->auto_cnt == 1 && br_auto_port(p))))
167                                 br_port_clear_promisc(p);
168                         else
169                                 br_port_set_promisc(p);
170                 }
171         }
172 }
173
174 int nbp_backup_change(struct net_bridge_port *p,
175                       struct net_device *backup_dev)
176 {
177         struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port);
178         struct net_bridge_port *backup_p = NULL;
179
180         ASSERT_RTNL();
181
182         if (backup_dev) {
183                 if (!br_port_exists(backup_dev))
184                         return -ENOENT;
185
186                 backup_p = br_port_get_rtnl(backup_dev);
187                 if (backup_p->br != p->br)
188                         return -EINVAL;
189         }
190
191         if (p == backup_p)
192                 return -EINVAL;
193
194         if (old_backup == backup_p)
195                 return 0;
196
197         /* if the backup link is already set, clear it */
198         if (old_backup)
199                 old_backup->backup_redirected_cnt--;
200
201         if (backup_p)
202                 backup_p->backup_redirected_cnt++;
203         rcu_assign_pointer(p->backup_port, backup_p);
204
205         return 0;
206 }
207
208 static void nbp_backup_clear(struct net_bridge_port *p)
209 {
210         nbp_backup_change(p, NULL);
211         if (p->backup_redirected_cnt) {
212                 struct net_bridge_port *cur_p;
213
214                 list_for_each_entry(cur_p, &p->br->port_list, list) {
215                         struct net_bridge_port *backup_p;
216
217                         backup_p = rtnl_dereference(cur_p->backup_port);
218                         if (backup_p == p)
219                                 nbp_backup_change(cur_p, NULL);
220                 }
221         }
222
223         WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt);
224 }
225
226 static void nbp_update_port_count(struct net_bridge *br)
227 {
228         struct net_bridge_port *p;
229         u32 cnt = 0;
230
231         list_for_each_entry(p, &br->port_list, list) {
232                 if (br_auto_port(p))
233                         cnt++;
234         }
235         if (br->auto_cnt != cnt) {
236                 br->auto_cnt = cnt;
237                 br_manage_promisc(br);
238         }
239 }
240
241 static void nbp_delete_promisc(struct net_bridge_port *p)
242 {
243         /* If port is currently promiscuous, unset promiscuity.
244          * Otherwise, it is a static port so remove all addresses
245          * from it.
246          */
247         dev_set_allmulti(p->dev, -1);
248         if (br_promisc_port(p))
249                 dev_set_promiscuity(p->dev, -1);
250         else
251                 br_fdb_unsync_static(p->br, p);
252 }
253
254 static void release_nbp(struct kobject *kobj)
255 {
256         struct net_bridge_port *p
257                 = container_of(kobj, struct net_bridge_port, kobj);
258         kfree(p);
259 }
260
261 static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
262 {
263         struct net_bridge_port *p = kobj_to_brport(kobj);
264
265         net_ns_get_ownership(dev_net(p->dev), uid, gid);
266 }
267
268 static struct kobj_type brport_ktype = {
269 #ifdef CONFIG_SYSFS
270         .sysfs_ops = &brport_sysfs_ops,
271 #endif
272         .release = release_nbp,
273         .get_ownership = brport_get_ownership,
274 };
275
276 static void destroy_nbp(struct net_bridge_port *p)
277 {
278         struct net_device *dev = p->dev;
279
280         p->br = NULL;
281         p->dev = NULL;
282         dev_put(dev);
283
284         kobject_put(&p->kobj);
285 }
286
287 static void destroy_nbp_rcu(struct rcu_head *head)
288 {
289         struct net_bridge_port *p =
290                         container_of(head, struct net_bridge_port, rcu);
291         destroy_nbp(p);
292 }
293
294 static unsigned get_max_headroom(struct net_bridge *br)
295 {
296         unsigned max_headroom = 0;
297         struct net_bridge_port *p;
298
299         list_for_each_entry(p, &br->port_list, list) {
300                 unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
301
302                 if (dev_headroom > max_headroom)
303                         max_headroom = dev_headroom;
304         }
305
306         return max_headroom;
307 }
308
309 static void update_headroom(struct net_bridge *br, int new_hr)
310 {
311         struct net_bridge_port *p;
312
313         list_for_each_entry(p, &br->port_list, list)
314                 netdev_set_rx_headroom(p->dev, new_hr);
315
316         br->dev->needed_headroom = new_hr;
317 }
318
319 /* Delete port(interface) from bridge is done in two steps.
320  * via RCU. First step, marks device as down. That deletes
321  * all the timers and stops new packets from flowing through.
322  *
323  * Final cleanup doesn't occur until after all CPU's finished
324  * processing packets.
325  *
326  * Protected from multiple admin operations by RTNL mutex
327  */
328 static void del_nbp(struct net_bridge_port *p)
329 {
330         struct net_bridge *br = p->br;
331         struct net_device *dev = p->dev;
332
333         sysfs_remove_link(br->ifobj, p->dev->name);
334
335         nbp_delete_promisc(p);
336
337         spin_lock_bh(&br->lock);
338         br_stp_disable_port(p);
339         spin_unlock_bh(&br->lock);
340
341         br_ifinfo_notify(RTM_DELLINK, NULL, p);
342
343         list_del_rcu(&p->list);
344         if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
345                 update_headroom(br, get_max_headroom(br));
346         netdev_reset_rx_headroom(dev);
347
348         nbp_vlan_flush(p);
349         br_fdb_delete_by_port(br, p, 0, 1);
350         switchdev_deferred_process();
351         nbp_backup_clear(p);
352
353         nbp_update_port_count(br);
354
355         netdev_upper_dev_unlink(dev, br->dev);
356
357         dev->priv_flags &= ~IFF_BRIDGE_PORT;
358
359         netdev_rx_handler_unregister(dev);
360
361         br_multicast_del_port(p);
362
363         kobject_uevent(&p->kobj, KOBJ_REMOVE);
364         kobject_del(&p->kobj);
365
366         br_netpoll_disable(p);
367
368         call_rcu(&p->rcu, destroy_nbp_rcu);
369 }
370
371 /* Delete bridge device */
372 void br_dev_delete(struct net_device *dev, struct list_head *head)
373 {
374         struct net_bridge *br = netdev_priv(dev);
375         struct net_bridge_port *p, *n;
376
377         list_for_each_entry_safe(p, n, &br->port_list, list) {
378                 del_nbp(p);
379         }
380
381         br_recalculate_neigh_suppress_enabled(br);
382
383         br_fdb_delete_by_port(br, NULL, 0, 1);
384
385         cancel_delayed_work_sync(&br->gc_work);
386
387         br_sysfs_delbr(br->dev);
388         unregister_netdevice_queue(br->dev, head);
389 }
390
391 /* find an available port number */
392 static int find_portno(struct net_bridge *br)
393 {
394         int index;
395         struct net_bridge_port *p;
396         unsigned long *inuse;
397
398         inuse = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
399                         GFP_KERNEL);
400         if (!inuse)
401                 return -ENOMEM;
402
403         set_bit(0, inuse);      /* zero is reserved */
404         list_for_each_entry(p, &br->port_list, list) {
405                 set_bit(p->port_no, inuse);
406         }
407         index = find_first_zero_bit(inuse, BR_MAX_PORTS);
408         kfree(inuse);
409
410         return (index >= BR_MAX_PORTS) ? -EXFULL : index;
411 }
412
413 /* called with RTNL but without bridge lock */
414 static struct net_bridge_port *new_nbp(struct net_bridge *br,
415                                        struct net_device *dev)
416 {
417         struct net_bridge_port *p;
418         int index, err;
419
420         index = find_portno(br);
421         if (index < 0)
422                 return ERR_PTR(index);
423
424         p = kzalloc(sizeof(*p), GFP_KERNEL);
425         if (p == NULL)
426                 return ERR_PTR(-ENOMEM);
427
428         p->br = br;
429         dev_hold(dev);
430         p->dev = dev;
431         p->path_cost = port_cost(dev);
432         p->priority = 0x8000 >> BR_PORT_BITS;
433         p->port_no = index;
434         p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
435         br_init_port(p);
436         br_set_state(p, BR_STATE_DISABLED);
437         br_stp_port_timer_init(p);
438         err = br_multicast_add_port(p);
439         if (err) {
440                 dev_put(dev);
441                 kfree(p);
442                 p = ERR_PTR(err);
443         }
444
445         return p;
446 }
447
448 int br_add_bridge(struct net *net, const char *name)
449 {
450         struct net_device *dev;
451         int res;
452
453         dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
454                            br_dev_setup);
455
456         if (!dev)
457                 return -ENOMEM;
458
459         dev_net_set(dev, net);
460         dev->rtnl_link_ops = &br_link_ops;
461
462         res = register_netdev(dev);
463         if (res)
464                 free_netdev(dev);
465         return res;
466 }
467
468 int br_del_bridge(struct net *net, const char *name)
469 {
470         struct net_device *dev;
471         int ret = 0;
472
473         rtnl_lock();
474         dev = __dev_get_by_name(net, name);
475         if (dev == NULL)
476                 ret =  -ENXIO;  /* Could not find device */
477
478         else if (!(dev->priv_flags & IFF_EBRIDGE)) {
479                 /* Attempt to delete non bridge device! */
480                 ret = -EPERM;
481         }
482
483         else if (dev->flags & IFF_UP) {
484                 /* Not shutdown yet. */
485                 ret = -EBUSY;
486         }
487
488         else
489                 br_dev_delete(dev, NULL);
490
491         rtnl_unlock();
492         return ret;
493 }
494
495 /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
496 static int br_mtu_min(const struct net_bridge *br)
497 {
498         const struct net_bridge_port *p;
499         int ret_mtu = 0;
500
501         list_for_each_entry(p, &br->port_list, list)
502                 if (!ret_mtu || ret_mtu > p->dev->mtu)
503                         ret_mtu = p->dev->mtu;
504
505         return ret_mtu ? ret_mtu : ETH_DATA_LEN;
506 }
507
508 void br_mtu_auto_adjust(struct net_bridge *br)
509 {
510         ASSERT_RTNL();
511
512         /* if the bridge MTU was manually configured don't mess with it */
513         if (br->mtu_set_by_user)
514                 return;
515
516         /* change to the minimum MTU and clear the flag which was set by
517          * the bridge ndo_change_mtu callback
518          */
519         dev_set_mtu(br->dev, br_mtu_min(br));
520         br->mtu_set_by_user = false;
521 }
522
523 static void br_set_gso_limits(struct net_bridge *br)
524 {
525         unsigned int gso_max_size = GSO_MAX_SIZE;
526         u16 gso_max_segs = GSO_MAX_SEGS;
527         const struct net_bridge_port *p;
528
529         list_for_each_entry(p, &br->port_list, list) {
530                 gso_max_size = min(gso_max_size, p->dev->gso_max_size);
531                 gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
532         }
533         br->dev->gso_max_size = gso_max_size;
534         br->dev->gso_max_segs = gso_max_segs;
535 }
536
537 /*
538  * Recomputes features using slave's features
539  */
540 netdev_features_t br_features_recompute(struct net_bridge *br,
541         netdev_features_t features)
542 {
543         struct net_bridge_port *p;
544         netdev_features_t mask;
545
546         if (list_empty(&br->port_list))
547                 return features;
548
549         mask = features;
550         features &= ~NETIF_F_ONE_FOR_ALL;
551
552         list_for_each_entry(p, &br->port_list, list) {
553                 features = netdev_increment_features(features,
554                                                      p->dev->features, mask);
555         }
556         features = netdev_add_tso_features(features, mask);
557
558         return features;
559 }
560
561 /* called with RTNL */
562 int br_add_if(struct net_bridge *br, struct net_device *dev,
563               struct netlink_ext_ack *extack)
564 {
565         struct net_bridge_port *p;
566         int err = 0;
567         unsigned br_hr, dev_hr;
568         bool changed_addr, fdb_synced = false;
569
570         /* Don't allow bridging non-ethernet like devices, or DSA-enabled
571          * master network devices since the bridge layer rx_handler prevents
572          * the DSA fake ethertype handler to be invoked, so we do not strip off
573          * the DSA switch tag protocol header and the bridge layer just return
574          * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
575          */
576         if ((dev->flags & IFF_LOOPBACK) ||
577             dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
578             !is_valid_ether_addr(dev->dev_addr) ||
579             netdev_uses_dsa(dev))
580                 return -EINVAL;
581
582         /* No bridging of bridges */
583         if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
584                 NL_SET_ERR_MSG(extack,
585                                "Can not enslave a bridge to a bridge");
586                 return -ELOOP;
587         }
588
589         /* Device has master upper dev */
590         if (netdev_master_upper_dev_get(dev))
591                 return -EBUSY;
592
593         /* No bridging devices that dislike that (e.g. wireless) */
594         if (dev->priv_flags & IFF_DONT_BRIDGE) {
595                 NL_SET_ERR_MSG(extack,
596                                "Device does not allow enslaving to a bridge");
597                 return -EOPNOTSUPP;
598         }
599
600         p = new_nbp(br, dev);
601         if (IS_ERR(p))
602                 return PTR_ERR(p);
603
604         call_netdevice_notifiers(NETDEV_JOIN, dev);
605
606         err = dev_set_allmulti(dev, 1);
607         if (err) {
608                 br_multicast_del_port(p);
609                 kfree(p);       /* kobject not yet init'd, manually free */
610                 goto err1;
611         }
612
613         err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
614                                    SYSFS_BRIDGE_PORT_ATTR);
615         if (err)
616                 goto err2;
617
618         err = br_sysfs_addif(p);
619         if (err)
620                 goto err2;
621
622         err = br_netpoll_enable(p);
623         if (err)
624                 goto err3;
625
626         err = netdev_rx_handler_register(dev, br_handle_frame, p);
627         if (err)
628                 goto err4;
629
630         dev->priv_flags |= IFF_BRIDGE_PORT;
631
632         err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
633         if (err)
634                 goto err5;
635
636         err = nbp_switchdev_mark_set(p);
637         if (err)
638                 goto err6;
639
640         dev_disable_lro(dev);
641
642         list_add_rcu(&p->list, &br->port_list);
643
644         nbp_update_port_count(br);
645         if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) {
646                 /* When updating the port count we also update all ports'
647                  * promiscuous mode.
648                  * A port leaving promiscuous mode normally gets the bridge's
649                  * fdb synced to the unicast filter (if supported), however,
650                  * `br_port_clear_promisc` does not distinguish between
651                  * non-promiscuous ports and *new* ports, so we need to
652                  * sync explicitly here.
653                  */
654                 fdb_synced = br_fdb_sync_static(br, p) == 0;
655                 if (!fdb_synced)
656                         netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n");
657         }
658
659         netdev_update_features(br->dev);
660
661         br_hr = br->dev->needed_headroom;
662         dev_hr = netdev_get_fwd_headroom(dev);
663         if (br_hr < dev_hr)
664                 update_headroom(br, dev_hr);
665         else
666                 netdev_set_rx_headroom(dev, br_hr);
667
668         if (br_fdb_insert(br, p, dev->dev_addr, 0))
669                 netdev_err(dev, "failed insert local address bridge forwarding table\n");
670
671         err = nbp_vlan_init(p);
672         if (err) {
673                 netdev_err(dev, "failed to initialize vlan filtering on this port\n");
674                 goto err7;
675         }
676
677         spin_lock_bh(&br->lock);
678         changed_addr = br_stp_recalculate_bridge_id(br);
679
680         if (netif_running(dev) && netif_oper_up(dev) &&
681             (br->dev->flags & IFF_UP))
682                 br_stp_enable_port(p);
683         spin_unlock_bh(&br->lock);
684
685         br_ifinfo_notify(RTM_NEWLINK, NULL, p);
686
687         if (changed_addr)
688                 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
689
690         br_mtu_auto_adjust(br);
691         br_set_gso_limits(br);
692
693         kobject_uevent(&p->kobj, KOBJ_ADD);
694
695         return 0;
696
697 err7:
698         if (fdb_synced)
699                 br_fdb_unsync_static(br, p);
700         list_del_rcu(&p->list);
701         br_fdb_delete_by_port(br, p, 0, 1);
702         nbp_update_port_count(br);
703 err6:
704         netdev_upper_dev_unlink(dev, br->dev);
705 err5:
706         dev->priv_flags &= ~IFF_BRIDGE_PORT;
707         netdev_rx_handler_unregister(dev);
708 err4:
709         br_netpoll_disable(p);
710 err3:
711         sysfs_remove_link(br->ifobj, p->dev->name);
712 err2:
713         br_multicast_del_port(p);
714         kobject_put(&p->kobj);
715         dev_set_allmulti(dev, -1);
716 err1:
717         dev_put(dev);
718         return err;
719 }
720
721 /* called with RTNL */
722 int br_del_if(struct net_bridge *br, struct net_device *dev)
723 {
724         struct net_bridge_port *p;
725         bool changed_addr;
726
727         p = br_port_get_rtnl(dev);
728         if (!p || p->br != br)
729                 return -EINVAL;
730
731         /* Since more than one interface can be attached to a bridge,
732          * there still maybe an alternate path for netconsole to use;
733          * therefore there is no reason for a NETDEV_RELEASE event.
734          */
735         del_nbp(p);
736
737         br_mtu_auto_adjust(br);
738         br_set_gso_limits(br);
739
740         spin_lock_bh(&br->lock);
741         changed_addr = br_stp_recalculate_bridge_id(br);
742         spin_unlock_bh(&br->lock);
743
744         if (changed_addr)
745                 call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
746
747         netdev_update_features(br->dev);
748
749         return 0;
750 }
751
752 void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
753 {
754         struct net_bridge *br = p->br;
755
756         if (mask & BR_AUTO_MASK)
757                 nbp_update_port_count(br);
758
759         if (mask & BR_NEIGH_SUPPRESS)
760                 br_recalculate_neigh_suppress_enabled(br);
761 }