GNU Linux-libre 4.19.207-gnu1
[releases.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct sk_buff_head q;
71 };
72
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77 /* "teql*" qdisc routines */
78
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
81 {
82         struct net_device *dev = qdisc_dev(sch);
83         struct teql_sched_data *q = qdisc_priv(sch);
84
85         if (q->q.qlen < dev->tx_queue_len) {
86                 __skb_queue_tail(&q->q, skb);
87                 return NET_XMIT_SUCCESS;
88         }
89
90         return qdisc_drop(skb, sch, to_free);
91 }
92
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96         struct teql_sched_data *dat = qdisc_priv(sch);
97         struct netdev_queue *dat_queue;
98         struct sk_buff *skb;
99         struct Qdisc *q;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         q = rcu_dereference_bh(dat_queue->qdisc);
104
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(q);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + q->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static void
126 teql_reset(struct Qdisc *sch)
127 {
128         struct teql_sched_data *dat = qdisc_priv(sch);
129
130         skb_queue_purge(&dat->q);
131         sch->q.qlen = 0;
132 }
133
134 static void
135 teql_destroy(struct Qdisc *sch)
136 {
137         struct Qdisc *q, *prev;
138         struct teql_sched_data *dat = qdisc_priv(sch);
139         struct teql_master *master = dat->m;
140
141         if (!master)
142                 return;
143
144         prev = master->slaves;
145         if (prev) {
146                 do {
147                         q = NEXT_SLAVE(prev);
148                         if (q == sch) {
149                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
150                                 if (q == master->slaves) {
151                                         master->slaves = NEXT_SLAVE(q);
152                                         if (q == master->slaves) {
153                                                 struct netdev_queue *txq;
154                                                 spinlock_t *root_lock;
155
156                                                 txq = netdev_get_tx_queue(master->dev, 0);
157                                                 master->slaves = NULL;
158
159                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
160                                                 spin_lock_bh(root_lock);
161                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
162                                                 spin_unlock_bh(root_lock);
163                                         }
164                                 }
165                                 skb_queue_purge(&dat->q);
166                                 break;
167                         }
168
169                 } while ((prev = q) != master->slaves);
170         }
171 }
172
173 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
174                            struct netlink_ext_ack *extack)
175 {
176         struct net_device *dev = qdisc_dev(sch);
177         struct teql_master *m = (struct teql_master *)sch->ops;
178         struct teql_sched_data *q = qdisc_priv(sch);
179
180         if (dev->hard_header_len > m->dev->hard_header_len)
181                 return -EINVAL;
182
183         if (m->dev == dev)
184                 return -ELOOP;
185
186         q->m = m;
187
188         skb_queue_head_init(&q->q);
189
190         if (m->slaves) {
191                 if (m->dev->flags & IFF_UP) {
192                         if ((m->dev->flags & IFF_POINTOPOINT &&
193                              !(dev->flags & IFF_POINTOPOINT)) ||
194                             (m->dev->flags & IFF_BROADCAST &&
195                              !(dev->flags & IFF_BROADCAST)) ||
196                             (m->dev->flags & IFF_MULTICAST &&
197                              !(dev->flags & IFF_MULTICAST)) ||
198                             dev->mtu < m->dev->mtu)
199                                 return -EINVAL;
200                 } else {
201                         if (!(dev->flags&IFF_POINTOPOINT))
202                                 m->dev->flags &= ~IFF_POINTOPOINT;
203                         if (!(dev->flags&IFF_BROADCAST))
204                                 m->dev->flags &= ~IFF_BROADCAST;
205                         if (!(dev->flags&IFF_MULTICAST))
206                                 m->dev->flags &= ~IFF_MULTICAST;
207                         if (dev->mtu < m->dev->mtu)
208                                 m->dev->mtu = dev->mtu;
209                 }
210                 q->next = NEXT_SLAVE(m->slaves);
211                 NEXT_SLAVE(m->slaves) = sch;
212         } else {
213                 q->next = sch;
214                 m->slaves = sch;
215                 m->dev->mtu = dev->mtu;
216                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217         }
218         return 0;
219 }
220
221
222 static int
223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224                struct net_device *dev, struct netdev_queue *txq,
225                struct dst_entry *dst)
226 {
227         struct neighbour *n;
228         int err = 0;
229
230         n = dst_neigh_lookup_skb(dst, skb);
231         if (!n)
232                 return -ENOENT;
233
234         if (dst->dev != dev) {
235                 struct neighbour *mn;
236
237                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238                 neigh_release(n);
239                 if (IS_ERR(mn))
240                         return PTR_ERR(mn);
241                 n = mn;
242         }
243
244         if (neigh_event_send(n, skb_res) == 0) {
245                 int err;
246                 char haddr[MAX_ADDR_LEN];
247
248                 neigh_ha_snapshot(haddr, n, dev);
249                 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
250                                       haddr, NULL, skb->len);
251
252                 if (err < 0)
253                         err = -EINVAL;
254         } else {
255                 err = (skb_res == NULL) ? -EAGAIN : 1;
256         }
257         neigh_release(n);
258         return err;
259 }
260
261 static inline int teql_resolve(struct sk_buff *skb,
262                                struct sk_buff *skb_res,
263                                struct net_device *dev,
264                                struct netdev_queue *txq)
265 {
266         struct dst_entry *dst = skb_dst(skb);
267         int res;
268
269         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
270                 return -ENODEV;
271
272         if (!dev->header_ops || !dst)
273                 return 0;
274
275         rcu_read_lock();
276         res = __teql_resolve(skb, skb_res, dev, txq, dst);
277         rcu_read_unlock();
278
279         return res;
280 }
281
282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284         struct teql_master *master = netdev_priv(dev);
285         struct Qdisc *start, *q;
286         int busy;
287         int nores;
288         int subq = skb_get_queue_mapping(skb);
289         struct sk_buff *skb_res = NULL;
290
291         start = master->slaves;
292
293 restart:
294         nores = 0;
295         busy = 0;
296
297         q = start;
298         if (!q)
299                 goto drop;
300
301         do {
302                 struct net_device *slave = qdisc_dev(q);
303                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304
305                 if (slave_txq->qdisc_sleeping != q)
306                         continue;
307                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
308                     !netif_running(slave)) {
309                         busy = 1;
310                         continue;
311                 }
312
313                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
314                 case 0:
315                         if (__netif_tx_trylock(slave_txq)) {
316                                 unsigned int length = qdisc_pkt_len(skb);
317
318                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
319                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
320                                     NETDEV_TX_OK) {
321                                         __netif_tx_unlock(slave_txq);
322                                         master->slaves = NEXT_SLAVE(q);
323                                         netif_wake_queue(dev);
324                                         master->tx_packets++;
325                                         master->tx_bytes += length;
326                                         return NETDEV_TX_OK;
327                                 }
328                                 __netif_tx_unlock(slave_txq);
329                         }
330                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
331                                 busy = 1;
332                         break;
333                 case 1:
334                         master->slaves = NEXT_SLAVE(q);
335                         return NETDEV_TX_OK;
336                 default:
337                         nores = 1;
338                         break;
339                 }
340                 __skb_pull(skb, skb_network_offset(skb));
341         } while ((q = NEXT_SLAVE(q)) != start);
342
343         if (nores && skb_res == NULL) {
344                 skb_res = skb;
345                 goto restart;
346         }
347
348         if (busy) {
349                 netif_stop_queue(dev);
350                 return NETDEV_TX_BUSY;
351         }
352         master->tx_errors++;
353
354 drop:
355         master->tx_dropped++;
356         dev_kfree_skb(skb);
357         return NETDEV_TX_OK;
358 }
359
360 static int teql_master_open(struct net_device *dev)
361 {
362         struct Qdisc *q;
363         struct teql_master *m = netdev_priv(dev);
364         int mtu = 0xFFFE;
365         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
366
367         if (m->slaves == NULL)
368                 return -EUNATCH;
369
370         flags = FMASK;
371
372         q = m->slaves;
373         do {
374                 struct net_device *slave = qdisc_dev(q);
375
376                 if (slave == NULL)
377                         return -EUNATCH;
378
379                 if (slave->mtu < mtu)
380                         mtu = slave->mtu;
381                 if (slave->hard_header_len > LL_MAX_HEADER)
382                         return -EINVAL;
383
384                 /* If all the slaves are BROADCAST, master is BROADCAST
385                    If all the slaves are PtP, master is PtP
386                    Otherwise, master is NBMA.
387                  */
388                 if (!(slave->flags&IFF_POINTOPOINT))
389                         flags &= ~IFF_POINTOPOINT;
390                 if (!(slave->flags&IFF_BROADCAST))
391                         flags &= ~IFF_BROADCAST;
392                 if (!(slave->flags&IFF_MULTICAST))
393                         flags &= ~IFF_MULTICAST;
394         } while ((q = NEXT_SLAVE(q)) != m->slaves);
395
396         m->dev->mtu = mtu;
397         m->dev->flags = (m->dev->flags&~FMASK) | flags;
398         netif_start_queue(m->dev);
399         return 0;
400 }
401
402 static int teql_master_close(struct net_device *dev)
403 {
404         netif_stop_queue(dev);
405         return 0;
406 }
407
408 static void teql_master_stats64(struct net_device *dev,
409                                 struct rtnl_link_stats64 *stats)
410 {
411         struct teql_master *m = netdev_priv(dev);
412
413         stats->tx_packets       = m->tx_packets;
414         stats->tx_bytes         = m->tx_bytes;
415         stats->tx_errors        = m->tx_errors;
416         stats->tx_dropped       = m->tx_dropped;
417 }
418
419 static int teql_master_mtu(struct net_device *dev, int new_mtu)
420 {
421         struct teql_master *m = netdev_priv(dev);
422         struct Qdisc *q;
423
424         q = m->slaves;
425         if (q) {
426                 do {
427                         if (new_mtu > qdisc_dev(q)->mtu)
428                                 return -EINVAL;
429                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
430         }
431
432         dev->mtu = new_mtu;
433         return 0;
434 }
435
436 static const struct net_device_ops teql_netdev_ops = {
437         .ndo_open       = teql_master_open,
438         .ndo_stop       = teql_master_close,
439         .ndo_start_xmit = teql_master_xmit,
440         .ndo_get_stats64 = teql_master_stats64,
441         .ndo_change_mtu = teql_master_mtu,
442 };
443
444 static __init void teql_master_setup(struct net_device *dev)
445 {
446         struct teql_master *master = netdev_priv(dev);
447         struct Qdisc_ops *ops = &master->qops;
448
449         master->dev     = dev;
450         ops->priv_size  = sizeof(struct teql_sched_data);
451
452         ops->enqueue    =       teql_enqueue;
453         ops->dequeue    =       teql_dequeue;
454         ops->peek       =       teql_peek;
455         ops->init       =       teql_qdisc_init;
456         ops->reset      =       teql_reset;
457         ops->destroy    =       teql_destroy;
458         ops->owner      =       THIS_MODULE;
459
460         dev->netdev_ops =       &teql_netdev_ops;
461         dev->type               = ARPHRD_VOID;
462         dev->mtu                = 1500;
463         dev->min_mtu            = 68;
464         dev->max_mtu            = 65535;
465         dev->tx_queue_len       = 100;
466         dev->flags              = IFF_NOARP;
467         dev->hard_header_len    = LL_MAX_HEADER;
468         netif_keep_dst(dev);
469 }
470
471 static LIST_HEAD(master_dev_list);
472 static int max_equalizers = 1;
473 module_param(max_equalizers, int, 0);
474 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
475
476 static int __init teql_init(void)
477 {
478         int i;
479         int err = -ENODEV;
480
481         for (i = 0; i < max_equalizers; i++) {
482                 struct net_device *dev;
483                 struct teql_master *master;
484
485                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
486                                    NET_NAME_UNKNOWN, teql_master_setup);
487                 if (!dev) {
488                         err = -ENOMEM;
489                         break;
490                 }
491
492                 if ((err = register_netdev(dev))) {
493                         free_netdev(dev);
494                         break;
495                 }
496
497                 master = netdev_priv(dev);
498
499                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
500                 err = register_qdisc(&master->qops);
501
502                 if (err) {
503                         unregister_netdev(dev);
504                         free_netdev(dev);
505                         break;
506                 }
507
508                 list_add_tail(&master->master_list, &master_dev_list);
509         }
510         return i ? 0 : err;
511 }
512
513 static void __exit teql_exit(void)
514 {
515         struct teql_master *master, *nxt;
516
517         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
518
519                 list_del(&master->master_list);
520
521                 unregister_qdisc(&master->qops);
522                 unregister_netdev(master->dev);
523                 free_netdev(master->dev);
524         }
525 }
526
527 module_init(teql_init);
528 module_exit(teql_exit);
529
530 MODULE_LICENSE("GPL");