GNU Linux-libre 6.5.10-gnu
[releases.git] / net / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105         int                     filter_cnt;
106
107         int                     level;          /* our level (see above) */
108         unsigned int            children;
109         struct htb_class        *parent;        /* parent class */
110
111         struct net_rate_estimator __rcu *rate_est;
112
113         /*
114          * Written often fields
115          */
116         struct gnet_stats_basic_sync bstats;
117         struct gnet_stats_basic_sync bstats_bias;
118         struct tc_htb_xstats    xstats; /* our special stats */
119
120         /* token bucket parameters */
121         s64                     tokens, ctokens;/* current number of tokens */
122         s64                     t_c;            /* checkpoint time */
123
124         union {
125                 struct htb_class_leaf {
126                         int             deficit[TC_HTB_MAXDEPTH];
127                         struct Qdisc    *q;
128                         struct netdev_queue *offload_queue;
129                 } leaf;
130                 struct htb_class_inner {
131                         struct htb_prio clprio[TC_HTB_NUMPRIO];
132                 } inner;
133         };
134         s64                     pq_key;
135
136         int                     prio_activity;  /* for which prios are we active */
137         enum htb_cmode          cmode;          /* current mode of the class */
138         struct rb_node          pq_node;        /* node for event queue */
139         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
140
141         unsigned int drops ____cacheline_aligned_in_smp;
142         unsigned int            overlimits;
143 };
144
145 struct htb_level {
146         struct rb_root  wait_pq;
147         struct htb_prio hprio[TC_HTB_NUMPRIO];
148 };
149
150 struct htb_sched {
151         struct Qdisc_class_hash clhash;
152         int                     defcls;         /* class where unclassified flows go to */
153         int                     rate2quantum;   /* quant = rate / rate2quantum */
154
155         /* filters for qdisc itself */
156         struct tcf_proto __rcu  *filter_list;
157         struct tcf_block        *block;
158
159 #define HTB_WARN_TOOMANYEVENTS  0x1
160         unsigned int            warned; /* only one warning */
161         int                     direct_qlen;
162         struct work_struct      work;
163
164         /* non shaped skbs; let them go directly thru */
165         struct qdisc_skb_head   direct_queue;
166         u32                     direct_pkts;
167         u32                     overlimits;
168
169         struct qdisc_watchdog   watchdog;
170
171         s64                     now;    /* cached dequeue time */
172
173         /* time of nearest event per level (row) */
174         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
175
176         int                     row_mask[TC_HTB_MAXDEPTH];
177
178         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
179
180         struct Qdisc            **direct_qdiscs;
181         unsigned int            num_direct_qdiscs;
182
183         bool                    offload;
184 };
185
186 /* find class in global hash table using given handle */
187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
188 {
189         struct htb_sched *q = qdisc_priv(sch);
190         struct Qdisc_class_common *clc;
191
192         clc = qdisc_class_find(&q->clhash, handle);
193         if (clc == NULL)
194                 return NULL;
195         return container_of(clc, struct htb_class, common);
196 }
197
198 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
199 {
200         return (unsigned long)htb_find(handle, sch);
201 }
202
203 #define HTB_DIRECT ((struct htb_class *)-1L)
204
205 /**
206  * htb_classify - classify a packet into class
207  * @skb: the socket buffer
208  * @sch: the active queue discipline
209  * @qerr: pointer for returned status code
210  *
211  * It returns NULL if the packet should be dropped or -1 if the packet
212  * should be passed directly thru. In all other cases leaf class is returned.
213  * We allow direct class selection by classid in priority. The we examine
214  * filters in qdisc and in inner nodes (if higher filter points to the inner
215  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
216  * internal fifo (direct). These packets then go directly thru. If we still
217  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
218  * then finish and return direct queue.
219  */
220 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
221                                       int *qerr)
222 {
223         struct htb_sched *q = qdisc_priv(sch);
224         struct htb_class *cl;
225         struct tcf_result res;
226         struct tcf_proto *tcf;
227         int result;
228
229         /* allow to select class by setting skb->priority to valid classid;
230          * note that nfmark can be used too by attaching filter fw with no
231          * rules in it
232          */
233         if (skb->priority == sch->handle)
234                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
235         cl = htb_find(skb->priority, sch);
236         if (cl) {
237                 if (cl->level == 0)
238                         return cl;
239                 /* Start with inner filter chain if a non-leaf class is selected */
240                 tcf = rcu_dereference_bh(cl->filter_list);
241         } else {
242                 tcf = rcu_dereference_bh(q->filter_list);
243         }
244
245         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
246         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
247 #ifdef CONFIG_NET_CLS_ACT
248                 switch (result) {
249                 case TC_ACT_QUEUED:
250                 case TC_ACT_STOLEN:
251                 case TC_ACT_TRAP:
252                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
253                         fallthrough;
254                 case TC_ACT_SHOT:
255                         return NULL;
256                 }
257 #endif
258                 cl = (void *)res.class;
259                 if (!cl) {
260                         if (res.classid == sch->handle)
261                                 return HTB_DIRECT;      /* X:0 (direct flow) */
262                         cl = htb_find(res.classid, sch);
263                         if (!cl)
264                                 break;  /* filter selected invalid classid */
265                 }
266                 if (!cl->level)
267                         return cl;      /* we hit leaf; return it */
268
269                 /* we have got inner class; apply inner filter chain */
270                 tcf = rcu_dereference_bh(cl->filter_list);
271         }
272         /* classification failed; try to use default class */
273         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
274         if (!cl || cl->level)
275                 return HTB_DIRECT;      /* bad default .. this is safe bet */
276         return cl;
277 }
278
279 /**
280  * htb_add_to_id_tree - adds class to the round robin list
281  * @root: the root of the tree
282  * @cl: the class to add
283  * @prio: the give prio in class
284  *
285  * Routine adds class to the list (actually tree) sorted by classid.
286  * Make sure that class is not already on such list for given prio.
287  */
288 static void htb_add_to_id_tree(struct rb_root *root,
289                                struct htb_class *cl, int prio)
290 {
291         struct rb_node **p = &root->rb_node, *parent = NULL;
292
293         while (*p) {
294                 struct htb_class *c;
295                 parent = *p;
296                 c = rb_entry(parent, struct htb_class, node[prio]);
297
298                 if (cl->common.classid > c->common.classid)
299                         p = &parent->rb_right;
300                 else
301                         p = &parent->rb_left;
302         }
303         rb_link_node(&cl->node[prio], parent, p);
304         rb_insert_color(&cl->node[prio], root);
305 }
306
307 /**
308  * htb_add_to_wait_tree - adds class to the event queue with delay
309  * @q: the priority event queue
310  * @cl: the class to add
311  * @delay: delay in microseconds
312  *
313  * The class is added to priority event queue to indicate that class will
314  * change its mode in cl->pq_key microseconds. Make sure that class is not
315  * already in the queue.
316  */
317 static void htb_add_to_wait_tree(struct htb_sched *q,
318                                  struct htb_class *cl, s64 delay)
319 {
320         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
321
322         cl->pq_key = q->now + delay;
323         if (cl->pq_key == q->now)
324                 cl->pq_key++;
325
326         /* update the nearest event cache */
327         if (q->near_ev_cache[cl->level] > cl->pq_key)
328                 q->near_ev_cache[cl->level] = cl->pq_key;
329
330         while (*p) {
331                 struct htb_class *c;
332                 parent = *p;
333                 c = rb_entry(parent, struct htb_class, pq_node);
334                 if (cl->pq_key >= c->pq_key)
335                         p = &parent->rb_right;
336                 else
337                         p = &parent->rb_left;
338         }
339         rb_link_node(&cl->pq_node, parent, p);
340         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
341 }
342
343 /**
344  * htb_next_rb_node - finds next node in binary tree
345  * @n: the current node in binary tree
346  *
347  * When we are past last key we return NULL.
348  * Average complexity is 2 steps per call.
349  */
350 static inline void htb_next_rb_node(struct rb_node **n)
351 {
352         *n = rb_next(*n);
353 }
354
355 /**
356  * htb_add_class_to_row - add class to its row
357  * @q: the priority event queue
358  * @cl: the class to add
359  * @mask: the given priorities in class in bitmap
360  *
361  * The class is added to row at priorities marked in mask.
362  * It does nothing if mask == 0.
363  */
364 static inline void htb_add_class_to_row(struct htb_sched *q,
365                                         struct htb_class *cl, int mask)
366 {
367         q->row_mask[cl->level] |= mask;
368         while (mask) {
369                 int prio = ffz(~mask);
370                 mask &= ~(1 << prio);
371                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
372         }
373 }
374
375 /* If this triggers, it is a bug in this code, but it need not be fatal */
376 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
377 {
378         if (RB_EMPTY_NODE(rb)) {
379                 WARN_ON(1);
380         } else {
381                 rb_erase(rb, root);
382                 RB_CLEAR_NODE(rb);
383         }
384 }
385
386
387 /**
388  * htb_remove_class_from_row - removes class from its row
389  * @q: the priority event queue
390  * @cl: the class to add
391  * @mask: the given priorities in class in bitmap
392  *
393  * The class is removed from row at priorities marked in mask.
394  * It does nothing if mask == 0.
395  */
396 static inline void htb_remove_class_from_row(struct htb_sched *q,
397                                                  struct htb_class *cl, int mask)
398 {
399         int m = 0;
400         struct htb_level *hlevel = &q->hlevel[cl->level];
401
402         while (mask) {
403                 int prio = ffz(~mask);
404                 struct htb_prio *hprio = &hlevel->hprio[prio];
405
406                 mask &= ~(1 << prio);
407                 if (hprio->ptr == cl->node + prio)
408                         htb_next_rb_node(&hprio->ptr);
409
410                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
411                 if (!hprio->row.rb_node)
412                         m |= 1 << prio;
413         }
414         q->row_mask[cl->level] &= ~m;
415 }
416
417 /**
418  * htb_activate_prios - creates active classe's feed chain
419  * @q: the priority event queue
420  * @cl: the class to activate
421  *
422  * The class is connected to ancestors and/or appropriate rows
423  * for priorities it is participating on. cl->cmode must be new
424  * (activated) mode. It does nothing if cl->prio_activity == 0.
425  */
426 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
427 {
428         struct htb_class *p = cl->parent;
429         long m, mask = cl->prio_activity;
430
431         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
432                 m = mask;
433                 while (m) {
434                         unsigned int prio = ffz(~m);
435
436                         if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
437                                 break;
438                         m &= ~(1 << prio);
439
440                         if (p->inner.clprio[prio].feed.rb_node)
441                                 /* parent already has its feed in use so that
442                                  * reset bit in mask as parent is already ok
443                                  */
444                                 mask &= ~(1 << prio);
445
446                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
447                 }
448                 p->prio_activity |= mask;
449                 cl = p;
450                 p = cl->parent;
451
452         }
453         if (cl->cmode == HTB_CAN_SEND && mask)
454                 htb_add_class_to_row(q, cl, mask);
455 }
456
457 /**
458  * htb_deactivate_prios - remove class from feed chain
459  * @q: the priority event queue
460  * @cl: the class to deactivate
461  *
462  * cl->cmode must represent old mode (before deactivation). It does
463  * nothing if cl->prio_activity == 0. Class is removed from all feed
464  * chains and rows.
465  */
466 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
467 {
468         struct htb_class *p = cl->parent;
469         long m, mask = cl->prio_activity;
470
471         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
472                 m = mask;
473                 mask = 0;
474                 while (m) {
475                         int prio = ffz(~m);
476                         m &= ~(1 << prio);
477
478                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
479                                 /* we are removing child which is pointed to from
480                                  * parent feed - forget the pointer but remember
481                                  * classid
482                                  */
483                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
484                                 p->inner.clprio[prio].ptr = NULL;
485                         }
486
487                         htb_safe_rb_erase(cl->node + prio,
488                                           &p->inner.clprio[prio].feed);
489
490                         if (!p->inner.clprio[prio].feed.rb_node)
491                                 mask |= 1 << prio;
492                 }
493
494                 p->prio_activity &= ~mask;
495                 cl = p;
496                 p = cl->parent;
497
498         }
499         if (cl->cmode == HTB_CAN_SEND && mask)
500                 htb_remove_class_from_row(q, cl, mask);
501 }
502
503 static inline s64 htb_lowater(const struct htb_class *cl)
504 {
505         if (htb_hysteresis)
506                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
507         else
508                 return 0;
509 }
510 static inline s64 htb_hiwater(const struct htb_class *cl)
511 {
512         if (htb_hysteresis)
513                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
514         else
515                 return 0;
516 }
517
518
519 /**
520  * htb_class_mode - computes and returns current class mode
521  * @cl: the target class
522  * @diff: diff time in microseconds
523  *
524  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
525  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
526  * from now to time when cl will change its state.
527  * Also it is worth to note that class mode doesn't change simply
528  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
529  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
530  * mode transitions per time unit. The speed gain is about 1/6.
531  */
532 static inline enum htb_cmode
533 htb_class_mode(struct htb_class *cl, s64 *diff)
534 {
535         s64 toks;
536
537         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
538                 *diff = -toks;
539                 return HTB_CANT_SEND;
540         }
541
542         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
543                 return HTB_CAN_SEND;
544
545         *diff = -toks;
546         return HTB_MAY_BORROW;
547 }
548
549 /**
550  * htb_change_class_mode - changes classe's mode
551  * @q: the priority event queue
552  * @cl: the target class
553  * @diff: diff time in microseconds
554  *
555  * This should be the only way how to change classe's mode under normal
556  * circumstances. Routine will update feed lists linkage, change mode
557  * and add class to the wait event queue if appropriate. New mode should
558  * be different from old one and cl->pq_key has to be valid if changing
559  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
560  */
561 static void
562 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
563 {
564         enum htb_cmode new_mode = htb_class_mode(cl, diff);
565
566         if (new_mode == cl->cmode)
567                 return;
568
569         if (new_mode == HTB_CANT_SEND) {
570                 cl->overlimits++;
571                 q->overlimits++;
572         }
573
574         if (cl->prio_activity) {        /* not necessary: speed optimization */
575                 if (cl->cmode != HTB_CANT_SEND)
576                         htb_deactivate_prios(q, cl);
577                 cl->cmode = new_mode;
578                 if (new_mode != HTB_CANT_SEND)
579                         htb_activate_prios(q, cl);
580         } else
581                 cl->cmode = new_mode;
582 }
583
584 /**
585  * htb_activate - inserts leaf cl into appropriate active feeds
586  * @q: the priority event queue
587  * @cl: the target class
588  *
589  * Routine learns (new) priority of leaf and activates feed chain
590  * for the prio. It can be called on already active leaf safely.
591  * It also adds leaf into droplist.
592  */
593 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
594 {
595         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
596
597         if (!cl->prio_activity) {
598                 cl->prio_activity = 1 << cl->prio;
599                 htb_activate_prios(q, cl);
600         }
601 }
602
603 /**
604  * htb_deactivate - remove leaf cl from active feeds
605  * @q: the priority event queue
606  * @cl: the target class
607  *
608  * Make sure that leaf is active. In the other words it can't be called
609  * with non-active leaf. It also removes class from the drop list.
610  */
611 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
612 {
613         WARN_ON(!cl->prio_activity);
614
615         htb_deactivate_prios(q, cl);
616         cl->prio_activity = 0;
617 }
618
619 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
620                        struct sk_buff **to_free)
621 {
622         int ret;
623         unsigned int len = qdisc_pkt_len(skb);
624         struct htb_sched *q = qdisc_priv(sch);
625         struct htb_class *cl = htb_classify(skb, sch, &ret);
626
627         if (cl == HTB_DIRECT) {
628                 /* enqueue to helper queue */
629                 if (q->direct_queue.qlen < q->direct_qlen) {
630                         __qdisc_enqueue_tail(skb, &q->direct_queue);
631                         q->direct_pkts++;
632                 } else {
633                         return qdisc_drop(skb, sch, to_free);
634                 }
635 #ifdef CONFIG_NET_CLS_ACT
636         } else if (!cl) {
637                 if (ret & __NET_XMIT_BYPASS)
638                         qdisc_qstats_drop(sch);
639                 __qdisc_drop(skb, to_free);
640                 return ret;
641 #endif
642         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
643                                         to_free)) != NET_XMIT_SUCCESS) {
644                 if (net_xmit_drop_count(ret)) {
645                         qdisc_qstats_drop(sch);
646                         cl->drops++;
647                 }
648                 return ret;
649         } else {
650                 htb_activate(q, cl);
651         }
652
653         sch->qstats.backlog += len;
654         sch->q.qlen++;
655         return NET_XMIT_SUCCESS;
656 }
657
658 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
659 {
660         s64 toks = diff + cl->tokens;
661
662         if (toks > cl->buffer)
663                 toks = cl->buffer;
664         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
665         if (toks <= -cl->mbuffer)
666                 toks = 1 - cl->mbuffer;
667
668         cl->tokens = toks;
669 }
670
671 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
672 {
673         s64 toks = diff + cl->ctokens;
674
675         if (toks > cl->cbuffer)
676                 toks = cl->cbuffer;
677         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
678         if (toks <= -cl->mbuffer)
679                 toks = 1 - cl->mbuffer;
680
681         cl->ctokens = toks;
682 }
683
684 /**
685  * htb_charge_class - charges amount "bytes" to leaf and ancestors
686  * @q: the priority event queue
687  * @cl: the class to start iterate
688  * @level: the minimum level to account
689  * @skb: the socket buffer
690  *
691  * Routine assumes that packet "bytes" long was dequeued from leaf cl
692  * borrowing from "level". It accounts bytes to ceil leaky bucket for
693  * leaf and all ancestors and to rate bucket for ancestors at levels
694  * "level" and higher. It also handles possible change of mode resulting
695  * from the update. Note that mode can also increase here (MAY_BORROW to
696  * CAN_SEND) because we can use more precise clock that event queue here.
697  * In such case we remove class from event queue first.
698  */
699 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
700                              int level, struct sk_buff *skb)
701 {
702         int bytes = qdisc_pkt_len(skb);
703         enum htb_cmode old_mode;
704         s64 diff;
705
706         while (cl) {
707                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
708                 if (cl->level >= level) {
709                         if (cl->level == level)
710                                 cl->xstats.lends++;
711                         htb_accnt_tokens(cl, bytes, diff);
712                 } else {
713                         cl->xstats.borrows++;
714                         cl->tokens += diff;     /* we moved t_c; update tokens */
715                 }
716                 htb_accnt_ctokens(cl, bytes, diff);
717                 cl->t_c = q->now;
718
719                 old_mode = cl->cmode;
720                 diff = 0;
721                 htb_change_class_mode(q, cl, &diff);
722                 if (old_mode != cl->cmode) {
723                         if (old_mode != HTB_CAN_SEND)
724                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
725                         if (cl->cmode != HTB_CAN_SEND)
726                                 htb_add_to_wait_tree(q, cl, diff);
727                 }
728
729                 /* update basic stats except for leaves which are already updated */
730                 if (cl->level)
731                         bstats_update(&cl->bstats, skb);
732
733                 cl = cl->parent;
734         }
735 }
736
737 /**
738  * htb_do_events - make mode changes to classes at the level
739  * @q: the priority event queue
740  * @level: which wait_pq in 'q->hlevel'
741  * @start: start jiffies
742  *
743  * Scans event queue for pending events and applies them. Returns time of
744  * next pending event (0 for no event in pq, q->now for too many events).
745  * Note: Applied are events whose have cl->pq_key <= q->now.
746  */
747 static s64 htb_do_events(struct htb_sched *q, const int level,
748                          unsigned long start)
749 {
750         /* don't run for longer than 2 jiffies; 2 is used instead of
751          * 1 to simplify things when jiffy is going to be incremented
752          * too soon
753          */
754         unsigned long stop_at = start + 2;
755         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
756
757         while (time_before(jiffies, stop_at)) {
758                 struct htb_class *cl;
759                 s64 diff;
760                 struct rb_node *p = rb_first(wait_pq);
761
762                 if (!p)
763                         return 0;
764
765                 cl = rb_entry(p, struct htb_class, pq_node);
766                 if (cl->pq_key > q->now)
767                         return cl->pq_key;
768
769                 htb_safe_rb_erase(p, wait_pq);
770                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
771                 htb_change_class_mode(q, cl, &diff);
772                 if (cl->cmode != HTB_CAN_SEND)
773                         htb_add_to_wait_tree(q, cl, diff);
774         }
775
776         /* too much load - let's continue after a break for scheduling */
777         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
778                 pr_warn("htb: too many events!\n");
779                 q->warned |= HTB_WARN_TOOMANYEVENTS;
780         }
781
782         return q->now;
783 }
784
785 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
786  * is no such one exists.
787  */
788 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
789                                               u32 id)
790 {
791         struct rb_node *r = NULL;
792         while (n) {
793                 struct htb_class *cl =
794                     rb_entry(n, struct htb_class, node[prio]);
795
796                 if (id > cl->common.classid) {
797                         n = n->rb_right;
798                 } else if (id < cl->common.classid) {
799                         r = n;
800                         n = n->rb_left;
801                 } else {
802                         return n;
803                 }
804         }
805         return r;
806 }
807
808 /**
809  * htb_lookup_leaf - returns next leaf class in DRR order
810  * @hprio: the current one
811  * @prio: which prio in class
812  *
813  * Find leaf where current feed pointers points to.
814  */
815 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
816 {
817         int i;
818         struct {
819                 struct rb_node *root;
820                 struct rb_node **pptr;
821                 u32 *pid;
822         } stk[TC_HTB_MAXDEPTH], *sp = stk;
823
824         BUG_ON(!hprio->row.rb_node);
825         sp->root = hprio->row.rb_node;
826         sp->pptr = &hprio->ptr;
827         sp->pid = &hprio->last_ptr_id;
828
829         for (i = 0; i < 65535; i++) {
830                 if (!*sp->pptr && *sp->pid) {
831                         /* ptr was invalidated but id is valid - try to recover
832                          * the original or next ptr
833                          */
834                         *sp->pptr =
835                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
836                 }
837                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
838                                  * can become out of date quickly
839                                  */
840                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
841                         *sp->pptr = sp->root;
842                         while ((*sp->pptr)->rb_left)
843                                 *sp->pptr = (*sp->pptr)->rb_left;
844                         if (sp > stk) {
845                                 sp--;
846                                 if (!*sp->pptr) {
847                                         WARN_ON(1);
848                                         return NULL;
849                                 }
850                                 htb_next_rb_node(sp->pptr);
851                         }
852                 } else {
853                         struct htb_class *cl;
854                         struct htb_prio *clp;
855
856                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
857                         if (!cl->level)
858                                 return cl;
859                         clp = &cl->inner.clprio[prio];
860                         (++sp)->root = clp->feed.rb_node;
861                         sp->pptr = &clp->ptr;
862                         sp->pid = &clp->last_ptr_id;
863                 }
864         }
865         WARN_ON(1);
866         return NULL;
867 }
868
869 /* dequeues packet at given priority and level; call only if
870  * you are sure that there is active class at prio/level
871  */
872 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
873                                         const int level)
874 {
875         struct sk_buff *skb = NULL;
876         struct htb_class *cl, *start;
877         struct htb_level *hlevel = &q->hlevel[level];
878         struct htb_prio *hprio = &hlevel->hprio[prio];
879
880         /* look initial class up in the row */
881         start = cl = htb_lookup_leaf(hprio, prio);
882
883         do {
884 next:
885                 if (unlikely(!cl))
886                         return NULL;
887
888                 /* class can be empty - it is unlikely but can be true if leaf
889                  * qdisc drops packets in enqueue routine or if someone used
890                  * graft operation on the leaf since last dequeue;
891                  * simply deactivate and skip such class
892                  */
893                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
894                         struct htb_class *next;
895                         htb_deactivate(q, cl);
896
897                         /* row/level might become empty */
898                         if ((q->row_mask[level] & (1 << prio)) == 0)
899                                 return NULL;
900
901                         next = htb_lookup_leaf(hprio, prio);
902
903                         if (cl == start)        /* fix start if we just deleted it */
904                                 start = next;
905                         cl = next;
906                         goto next;
907                 }
908
909                 skb = cl->leaf.q->dequeue(cl->leaf.q);
910                 if (likely(skb != NULL))
911                         break;
912
913                 qdisc_warn_nonwc("htb", cl->leaf.q);
914                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
915                                          &q->hlevel[0].hprio[prio].ptr);
916                 cl = htb_lookup_leaf(hprio, prio);
917
918         } while (cl != start);
919
920         if (likely(skb != NULL)) {
921                 bstats_update(&cl->bstats, skb);
922                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
923                 if (cl->leaf.deficit[level] < 0) {
924                         cl->leaf.deficit[level] += cl->quantum;
925                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
926                                                  &q->hlevel[0].hprio[prio].ptr);
927                 }
928                 /* this used to be after charge_class but this constelation
929                  * gives us slightly better performance
930                  */
931                 if (!cl->leaf.q->q.qlen)
932                         htb_deactivate(q, cl);
933                 htb_charge_class(q, cl, level, skb);
934         }
935         return skb;
936 }
937
938 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
939 {
940         struct sk_buff *skb;
941         struct htb_sched *q = qdisc_priv(sch);
942         int level;
943         s64 next_event;
944         unsigned long start_at;
945
946         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
947         skb = __qdisc_dequeue_head(&q->direct_queue);
948         if (skb != NULL) {
949 ok:
950                 qdisc_bstats_update(sch, skb);
951                 qdisc_qstats_backlog_dec(sch, skb);
952                 sch->q.qlen--;
953                 return skb;
954         }
955
956         if (!sch->q.qlen)
957                 goto fin;
958         q->now = ktime_get_ns();
959         start_at = jiffies;
960
961         next_event = q->now + 5LLU * NSEC_PER_SEC;
962
963         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
964                 /* common case optimization - skip event handler quickly */
965                 int m;
966                 s64 event = q->near_ev_cache[level];
967
968                 if (q->now >= event) {
969                         event = htb_do_events(q, level, start_at);
970                         if (!event)
971                                 event = q->now + NSEC_PER_SEC;
972                         q->near_ev_cache[level] = event;
973                 }
974
975                 if (next_event > event)
976                         next_event = event;
977
978                 m = ~q->row_mask[level];
979                 while (m != (int)(-1)) {
980                         int prio = ffz(m);
981
982                         m |= 1 << prio;
983                         skb = htb_dequeue_tree(q, prio, level);
984                         if (likely(skb != NULL))
985                                 goto ok;
986                 }
987         }
988         if (likely(next_event > q->now))
989                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
990         else
991                 schedule_work(&q->work);
992 fin:
993         return skb;
994 }
995
996 /* reset all classes */
997 /* always caled under BH & queue lock */
998 static void htb_reset(struct Qdisc *sch)
999 {
1000         struct htb_sched *q = qdisc_priv(sch);
1001         struct htb_class *cl;
1002         unsigned int i;
1003
1004         for (i = 0; i < q->clhash.hashsize; i++) {
1005                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1006                         if (cl->level)
1007                                 memset(&cl->inner, 0, sizeof(cl->inner));
1008                         else {
1009                                 if (cl->leaf.q && !q->offload)
1010                                         qdisc_reset(cl->leaf.q);
1011                         }
1012                         cl->prio_activity = 0;
1013                         cl->cmode = HTB_CAN_SEND;
1014                 }
1015         }
1016         qdisc_watchdog_cancel(&q->watchdog);
1017         __qdisc_reset_queue(&q->direct_queue);
1018         memset(q->hlevel, 0, sizeof(q->hlevel));
1019         memset(q->row_mask, 0, sizeof(q->row_mask));
1020 }
1021
1022 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1023         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1024         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1025         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1027         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1028         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1029         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1030         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1031 };
1032
1033 static void htb_work_func(struct work_struct *work)
1034 {
1035         struct htb_sched *q = container_of(work, struct htb_sched, work);
1036         struct Qdisc *sch = q->watchdog.qdisc;
1037
1038         rcu_read_lock();
1039         __netif_schedule(qdisc_root(sch));
1040         rcu_read_unlock();
1041 }
1042
1043 static void htb_set_lockdep_class_child(struct Qdisc *q)
1044 {
1045         static struct lock_class_key child_key;
1046
1047         lockdep_set_class(qdisc_lock(q), &child_key);
1048 }
1049
1050 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1051 {
1052         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1053 }
1054
1055 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1056                     struct netlink_ext_ack *extack)
1057 {
1058         struct net_device *dev = qdisc_dev(sch);
1059         struct tc_htb_qopt_offload offload_opt;
1060         struct htb_sched *q = qdisc_priv(sch);
1061         struct nlattr *tb[TCA_HTB_MAX + 1];
1062         struct tc_htb_glob *gopt;
1063         unsigned int ntx;
1064         bool offload;
1065         int err;
1066
1067         qdisc_watchdog_init(&q->watchdog, sch);
1068         INIT_WORK(&q->work, htb_work_func);
1069
1070         if (!opt)
1071                 return -EINVAL;
1072
1073         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1074         if (err)
1075                 return err;
1076
1077         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1078                                           NULL);
1079         if (err < 0)
1080                 return err;
1081
1082         if (!tb[TCA_HTB_INIT])
1083                 return -EINVAL;
1084
1085         gopt = nla_data(tb[TCA_HTB_INIT]);
1086         if (gopt->version != HTB_VER >> 16)
1087                 return -EINVAL;
1088
1089         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1090
1091         if (offload) {
1092                 if (sch->parent != TC_H_ROOT) {
1093                         NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1094                         return -EOPNOTSUPP;
1095                 }
1096
1097                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1098                         NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1099                         return -EOPNOTSUPP;
1100                 }
1101
1102                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1103                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1104                                            sizeof(*q->direct_qdiscs),
1105                                            GFP_KERNEL);
1106                 if (!q->direct_qdiscs)
1107                         return -ENOMEM;
1108         }
1109
1110         err = qdisc_class_hash_init(&q->clhash);
1111         if (err < 0)
1112                 return err;
1113
1114         if (tb[TCA_HTB_DIRECT_QLEN])
1115                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1116         else
1117                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1118
1119         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1120                 q->rate2quantum = 1;
1121         q->defcls = gopt->defcls;
1122
1123         if (!offload)
1124                 return 0;
1125
1126         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1127                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1128                 struct Qdisc *qdisc;
1129
1130                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1131                                           TC_H_MAKE(sch->handle, 0), extack);
1132                 if (!qdisc) {
1133                         return -ENOMEM;
1134                 }
1135
1136                 htb_set_lockdep_class_child(qdisc);
1137                 q->direct_qdiscs[ntx] = qdisc;
1138                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1139         }
1140
1141         sch->flags |= TCQ_F_MQROOT;
1142
1143         offload_opt = (struct tc_htb_qopt_offload) {
1144                 .command = TC_HTB_CREATE,
1145                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1146                 .classid = TC_H_MIN(q->defcls),
1147                 .extack = extack,
1148         };
1149         err = htb_offload(dev, &offload_opt);
1150         if (err)
1151                 return err;
1152
1153         /* Defer this assignment, so that htb_destroy skips offload-related
1154          * parts (especially calling ndo_setup_tc) on errors.
1155          */
1156         q->offload = true;
1157
1158         return 0;
1159 }
1160
1161 static void htb_attach_offload(struct Qdisc *sch)
1162 {
1163         struct net_device *dev = qdisc_dev(sch);
1164         struct htb_sched *q = qdisc_priv(sch);
1165         unsigned int ntx;
1166
1167         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1168                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1169
1170                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1171                 qdisc_put(old);
1172                 qdisc_hash_add(qdisc, false);
1173         }
1174         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1175                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1176                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1177
1178                 qdisc_put(old);
1179         }
1180
1181         kfree(q->direct_qdiscs);
1182         q->direct_qdiscs = NULL;
1183 }
1184
1185 static void htb_attach_software(struct Qdisc *sch)
1186 {
1187         struct net_device *dev = qdisc_dev(sch);
1188         unsigned int ntx;
1189
1190         /* Resemble qdisc_graft behavior. */
1191         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1192                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1193                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1194
1195                 qdisc_refcount_inc(sch);
1196
1197                 qdisc_put(old);
1198         }
1199 }
1200
1201 static void htb_attach(struct Qdisc *sch)
1202 {
1203         struct htb_sched *q = qdisc_priv(sch);
1204
1205         if (q->offload)
1206                 htb_attach_offload(sch);
1207         else
1208                 htb_attach_software(sch);
1209 }
1210
1211 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1212 {
1213         struct htb_sched *q = qdisc_priv(sch);
1214         struct nlattr *nest;
1215         struct tc_htb_glob gopt;
1216
1217         if (q->offload)
1218                 sch->flags |= TCQ_F_OFFLOADED;
1219         else
1220                 sch->flags &= ~TCQ_F_OFFLOADED;
1221
1222         sch->qstats.overlimits = q->overlimits;
1223         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1224          * no change can happen on the qdisc parameters.
1225          */
1226
1227         gopt.direct_pkts = q->direct_pkts;
1228         gopt.version = HTB_VER;
1229         gopt.rate2quantum = q->rate2quantum;
1230         gopt.defcls = q->defcls;
1231         gopt.debug = 0;
1232
1233         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1234         if (nest == NULL)
1235                 goto nla_put_failure;
1236         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1237             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1238                 goto nla_put_failure;
1239         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1240                 goto nla_put_failure;
1241
1242         return nla_nest_end(skb, nest);
1243
1244 nla_put_failure:
1245         nla_nest_cancel(skb, nest);
1246         return -1;
1247 }
1248
1249 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1250                           struct sk_buff *skb, struct tcmsg *tcm)
1251 {
1252         struct htb_class *cl = (struct htb_class *)arg;
1253         struct htb_sched *q = qdisc_priv(sch);
1254         struct nlattr *nest;
1255         struct tc_htb_opt opt;
1256
1257         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1258          * no change can happen on the class parameters.
1259          */
1260         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1261         tcm->tcm_handle = cl->common.classid;
1262         if (!cl->level && cl->leaf.q)
1263                 tcm->tcm_info = cl->leaf.q->handle;
1264
1265         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1266         if (nest == NULL)
1267                 goto nla_put_failure;
1268
1269         memset(&opt, 0, sizeof(opt));
1270
1271         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1272         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1273         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1274         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1275         opt.quantum = cl->quantum;
1276         opt.prio = cl->prio;
1277         opt.level = cl->level;
1278         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1279                 goto nla_put_failure;
1280         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1281                 goto nla_put_failure;
1282         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1283             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1284                               TCA_HTB_PAD))
1285                 goto nla_put_failure;
1286         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1287             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1288                               TCA_HTB_PAD))
1289                 goto nla_put_failure;
1290
1291         return nla_nest_end(skb, nest);
1292
1293 nla_put_failure:
1294         nla_nest_cancel(skb, nest);
1295         return -1;
1296 }
1297
1298 static void htb_offload_aggregate_stats(struct htb_sched *q,
1299                                         struct htb_class *cl)
1300 {
1301         u64 bytes = 0, packets = 0;
1302         struct htb_class *c;
1303         unsigned int i;
1304
1305         gnet_stats_basic_sync_init(&cl->bstats);
1306
1307         for (i = 0; i < q->clhash.hashsize; i++) {
1308                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1309                         struct htb_class *p = c;
1310
1311                         while (p && p->level < cl->level)
1312                                 p = p->parent;
1313
1314                         if (p != cl)
1315                                 continue;
1316
1317                         bytes += u64_stats_read(&c->bstats_bias.bytes);
1318                         packets += u64_stats_read(&c->bstats_bias.packets);
1319                         if (c->level == 0) {
1320                                 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1321                                 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1322                         }
1323                 }
1324         }
1325         _bstats_update(&cl->bstats, bytes, packets);
1326 }
1327
1328 static int
1329 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1330 {
1331         struct htb_class *cl = (struct htb_class *)arg;
1332         struct htb_sched *q = qdisc_priv(sch);
1333         struct gnet_stats_queue qs = {
1334                 .drops = cl->drops,
1335                 .overlimits = cl->overlimits,
1336         };
1337         __u32 qlen = 0;
1338
1339         if (!cl->level && cl->leaf.q)
1340                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1341
1342         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1343                                     INT_MIN, INT_MAX);
1344         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1345                                      INT_MIN, INT_MAX);
1346
1347         if (q->offload) {
1348                 if (!cl->level) {
1349                         if (cl->leaf.q)
1350                                 cl->bstats = cl->leaf.q->bstats;
1351                         else
1352                                 gnet_stats_basic_sync_init(&cl->bstats);
1353                         _bstats_update(&cl->bstats,
1354                                        u64_stats_read(&cl->bstats_bias.bytes),
1355                                        u64_stats_read(&cl->bstats_bias.packets));
1356                 } else {
1357                         htb_offload_aggregate_stats(q, cl);
1358                 }
1359         }
1360
1361         if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1362             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1363             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1364                 return -1;
1365
1366         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1367 }
1368
1369 static struct netdev_queue *
1370 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1371 {
1372         struct net_device *dev = qdisc_dev(sch);
1373         struct tc_htb_qopt_offload offload_opt;
1374         struct htb_sched *q = qdisc_priv(sch);
1375         int err;
1376
1377         if (!q->offload)
1378                 return sch->dev_queue;
1379
1380         offload_opt = (struct tc_htb_qopt_offload) {
1381                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1382                 .classid = TC_H_MIN(tcm->tcm_parent),
1383         };
1384         err = htb_offload(dev, &offload_opt);
1385         if (err || offload_opt.qid >= dev->num_tx_queues)
1386                 return NULL;
1387         return netdev_get_tx_queue(dev, offload_opt.qid);
1388 }
1389
1390 static struct Qdisc *
1391 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1392 {
1393         struct net_device *dev = dev_queue->dev;
1394         struct Qdisc *old_q;
1395
1396         if (dev->flags & IFF_UP)
1397                 dev_deactivate(dev);
1398         old_q = dev_graft_qdisc(dev_queue, new_q);
1399         if (new_q)
1400                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1401         if (dev->flags & IFF_UP)
1402                 dev_activate(dev);
1403
1404         return old_q;
1405 }
1406
1407 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1408 {
1409         struct netdev_queue *queue;
1410
1411         queue = cl->leaf.offload_queue;
1412         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1413                 WARN_ON(cl->leaf.q->dev_queue != queue);
1414
1415         return queue;
1416 }
1417
1418 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1419                                    struct htb_class *cl_new, bool destroying)
1420 {
1421         struct netdev_queue *queue_old, *queue_new;
1422         struct net_device *dev = qdisc_dev(sch);
1423
1424         queue_old = htb_offload_get_queue(cl_old);
1425         queue_new = htb_offload_get_queue(cl_new);
1426
1427         if (!destroying) {
1428                 struct Qdisc *qdisc;
1429
1430                 if (dev->flags & IFF_UP)
1431                         dev_deactivate(dev);
1432                 qdisc = dev_graft_qdisc(queue_old, NULL);
1433                 WARN_ON(qdisc != cl_old->leaf.q);
1434         }
1435
1436         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1437                 cl_old->leaf.q->dev_queue = queue_new;
1438         cl_old->leaf.offload_queue = queue_new;
1439
1440         if (!destroying) {
1441                 struct Qdisc *qdisc;
1442
1443                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1444                 if (dev->flags & IFF_UP)
1445                         dev_activate(dev);
1446                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1447         }
1448 }
1449
1450 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1451                      struct Qdisc **old, struct netlink_ext_ack *extack)
1452 {
1453         struct netdev_queue *dev_queue = sch->dev_queue;
1454         struct htb_class *cl = (struct htb_class *)arg;
1455         struct htb_sched *q = qdisc_priv(sch);
1456         struct Qdisc *old_q;
1457
1458         if (cl->level)
1459                 return -EINVAL;
1460
1461         if (q->offload)
1462                 dev_queue = htb_offload_get_queue(cl);
1463
1464         if (!new) {
1465                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1466                                         cl->common.classid, extack);
1467                 if (!new)
1468                         return -ENOBUFS;
1469         }
1470
1471         if (q->offload) {
1472                 htb_set_lockdep_class_child(new);
1473                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1474                 qdisc_refcount_inc(new);
1475                 old_q = htb_graft_helper(dev_queue, new);
1476         }
1477
1478         *old = qdisc_replace(sch, new, &cl->leaf.q);
1479
1480         if (q->offload) {
1481                 WARN_ON(old_q != *old);
1482                 qdisc_put(old_q);
1483         }
1484
1485         return 0;
1486 }
1487
1488 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1489 {
1490         struct htb_class *cl = (struct htb_class *)arg;
1491         return !cl->level ? cl->leaf.q : NULL;
1492 }
1493
1494 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1495 {
1496         struct htb_class *cl = (struct htb_class *)arg;
1497
1498         htb_deactivate(qdisc_priv(sch), cl);
1499 }
1500
1501 static inline int htb_parent_last_child(struct htb_class *cl)
1502 {
1503         if (!cl->parent)
1504                 /* the root class */
1505                 return 0;
1506         if (cl->parent->children > 1)
1507                 /* not the last child */
1508                 return 0;
1509         return 1;
1510 }
1511
1512 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1513                                struct Qdisc *new_q)
1514 {
1515         struct htb_sched *q = qdisc_priv(sch);
1516         struct htb_class *parent = cl->parent;
1517
1518         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1519
1520         if (parent->cmode != HTB_CAN_SEND)
1521                 htb_safe_rb_erase(&parent->pq_node,
1522                                   &q->hlevel[parent->level].wait_pq);
1523
1524         parent->level = 0;
1525         memset(&parent->inner, 0, sizeof(parent->inner));
1526         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1527         parent->tokens = parent->buffer;
1528         parent->ctokens = parent->cbuffer;
1529         parent->t_c = ktime_get_ns();
1530         parent->cmode = HTB_CAN_SEND;
1531         if (q->offload)
1532                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1533 }
1534
1535 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1536                                        struct netdev_queue *dev_queue,
1537                                        struct Qdisc *new_q)
1538 {
1539         struct Qdisc *old_q;
1540
1541         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1542         if (new_q)
1543                 qdisc_refcount_inc(new_q);
1544         old_q = htb_graft_helper(dev_queue, new_q);
1545         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1546 }
1547
1548 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1549                                      bool last_child, bool destroying,
1550                                      struct netlink_ext_ack *extack)
1551 {
1552         struct tc_htb_qopt_offload offload_opt;
1553         struct netdev_queue *dev_queue;
1554         struct Qdisc *q = cl->leaf.q;
1555         struct Qdisc *old;
1556         int err;
1557
1558         if (cl->level)
1559                 return -EINVAL;
1560
1561         WARN_ON(!q);
1562         dev_queue = htb_offload_get_queue(cl);
1563         /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1564          * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1565          * does not need to graft or qdisc_put the qdisc being destroyed.
1566          */
1567         if (!destroying) {
1568                 old = htb_graft_helper(dev_queue, NULL);
1569                 /* Last qdisc grafted should be the same as cl->leaf.q when
1570                  * calling htb_delete.
1571                  */
1572                 WARN_ON(old != q);
1573         }
1574
1575         if (cl->parent) {
1576                 _bstats_update(&cl->parent->bstats_bias,
1577                                u64_stats_read(&q->bstats.bytes),
1578                                u64_stats_read(&q->bstats.packets));
1579         }
1580
1581         offload_opt = (struct tc_htb_qopt_offload) {
1582                 .command = !last_child ? TC_HTB_LEAF_DEL :
1583                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1584                            TC_HTB_LEAF_DEL_LAST,
1585                 .classid = cl->common.classid,
1586                 .extack = extack,
1587         };
1588         err = htb_offload(qdisc_dev(sch), &offload_opt);
1589
1590         if (!destroying) {
1591                 if (!err)
1592                         qdisc_put(old);
1593                 else
1594                         htb_graft_helper(dev_queue, old);
1595         }
1596
1597         if (last_child)
1598                 return err;
1599
1600         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1601                 u32 classid = TC_H_MAJ(sch->handle) |
1602                               TC_H_MIN(offload_opt.classid);
1603                 struct htb_class *moved_cl = htb_find(classid, sch);
1604
1605                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1606         }
1607
1608         return err;
1609 }
1610
1611 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1612 {
1613         if (!cl->level) {
1614                 WARN_ON(!cl->leaf.q);
1615                 qdisc_put(cl->leaf.q);
1616         }
1617         gen_kill_estimator(&cl->rate_est);
1618         tcf_block_put(cl->block);
1619         kfree(cl);
1620 }
1621
1622 static void htb_destroy(struct Qdisc *sch)
1623 {
1624         struct net_device *dev = qdisc_dev(sch);
1625         struct tc_htb_qopt_offload offload_opt;
1626         struct htb_sched *q = qdisc_priv(sch);
1627         struct hlist_node *next;
1628         bool nonempty, changed;
1629         struct htb_class *cl;
1630         unsigned int i;
1631
1632         cancel_work_sync(&q->work);
1633         qdisc_watchdog_cancel(&q->watchdog);
1634         /* This line used to be after htb_destroy_class call below
1635          * and surprisingly it worked in 2.4. But it must precede it
1636          * because filter need its target class alive to be able to call
1637          * unbind_filter on it (without Oops).
1638          */
1639         tcf_block_put(q->block);
1640
1641         for (i = 0; i < q->clhash.hashsize; i++) {
1642                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1643                         tcf_block_put(cl->block);
1644                         cl->block = NULL;
1645                 }
1646         }
1647
1648         do {
1649                 nonempty = false;
1650                 changed = false;
1651                 for (i = 0; i < q->clhash.hashsize; i++) {
1652                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1653                                                   common.hnode) {
1654                                 bool last_child;
1655
1656                                 if (!q->offload) {
1657                                         htb_destroy_class(sch, cl);
1658                                         continue;
1659                                 }
1660
1661                                 nonempty = true;
1662
1663                                 if (cl->level)
1664                                         continue;
1665
1666                                 changed = true;
1667
1668                                 last_child = htb_parent_last_child(cl);
1669                                 htb_destroy_class_offload(sch, cl, last_child,
1670                                                           true, NULL);
1671                                 qdisc_class_hash_remove(&q->clhash,
1672                                                         &cl->common);
1673                                 if (cl->parent)
1674                                         cl->parent->children--;
1675                                 if (last_child)
1676                                         htb_parent_to_leaf(sch, cl, NULL);
1677                                 htb_destroy_class(sch, cl);
1678                         }
1679                 }
1680         } while (changed);
1681         WARN_ON(nonempty);
1682
1683         qdisc_class_hash_destroy(&q->clhash);
1684         __qdisc_reset_queue(&q->direct_queue);
1685
1686         if (q->offload) {
1687                 offload_opt = (struct tc_htb_qopt_offload) {
1688                         .command = TC_HTB_DESTROY,
1689                 };
1690                 htb_offload(dev, &offload_opt);
1691         }
1692
1693         if (!q->direct_qdiscs)
1694                 return;
1695         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1696                 qdisc_put(q->direct_qdiscs[i]);
1697         kfree(q->direct_qdiscs);
1698 }
1699
1700 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1701                       struct netlink_ext_ack *extack)
1702 {
1703         struct htb_sched *q = qdisc_priv(sch);
1704         struct htb_class *cl = (struct htb_class *)arg;
1705         struct Qdisc *new_q = NULL;
1706         int last_child = 0;
1707         int err;
1708
1709         /* TODO: why don't allow to delete subtree ? references ? does
1710          * tc subsys guarantee us that in htb_destroy it holds no class
1711          * refs so that we can remove children safely there ?
1712          */
1713         if (cl->children || cl->filter_cnt)
1714                 return -EBUSY;
1715
1716         if (!cl->level && htb_parent_last_child(cl))
1717                 last_child = 1;
1718
1719         if (q->offload) {
1720                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1721                                                 extack);
1722                 if (err)
1723                         return err;
1724         }
1725
1726         if (last_child) {
1727                 struct netdev_queue *dev_queue = sch->dev_queue;
1728
1729                 if (q->offload)
1730                         dev_queue = htb_offload_get_queue(cl);
1731
1732                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1733                                           cl->parent->common.classid,
1734                                           NULL);
1735                 if (q->offload) {
1736                         if (new_q)
1737                                 htb_set_lockdep_class_child(new_q);
1738                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1739                 }
1740         }
1741
1742         sch_tree_lock(sch);
1743
1744         if (!cl->level)
1745                 qdisc_purge_queue(cl->leaf.q);
1746
1747         /* delete from hash and active; remainder in destroy_class */
1748         qdisc_class_hash_remove(&q->clhash, &cl->common);
1749         if (cl->parent)
1750                 cl->parent->children--;
1751
1752         if (cl->prio_activity)
1753                 htb_deactivate(q, cl);
1754
1755         if (cl->cmode != HTB_CAN_SEND)
1756                 htb_safe_rb_erase(&cl->pq_node,
1757                                   &q->hlevel[cl->level].wait_pq);
1758
1759         if (last_child)
1760                 htb_parent_to_leaf(sch, cl, new_q);
1761
1762         sch_tree_unlock(sch);
1763
1764         htb_destroy_class(sch, cl);
1765         return 0;
1766 }
1767
1768 static int htb_change_class(struct Qdisc *sch, u32 classid,
1769                             u32 parentid, struct nlattr **tca,
1770                             unsigned long *arg, struct netlink_ext_ack *extack)
1771 {
1772         int err = -EINVAL;
1773         struct htb_sched *q = qdisc_priv(sch);
1774         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1775         struct tc_htb_qopt_offload offload_opt;
1776         struct nlattr *opt = tca[TCA_OPTIONS];
1777         struct nlattr *tb[TCA_HTB_MAX + 1];
1778         struct Qdisc *parent_qdisc = NULL;
1779         struct netdev_queue *dev_queue;
1780         struct tc_htb_opt *hopt;
1781         u64 rate64, ceil64;
1782         int warn = 0;
1783
1784         /* extract all subattrs from opt attr */
1785         if (!opt)
1786                 goto failure;
1787
1788         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1789                                           extack);
1790         if (err < 0)
1791                 goto failure;
1792
1793         err = -EINVAL;
1794         if (tb[TCA_HTB_PARMS] == NULL)
1795                 goto failure;
1796
1797         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1798
1799         hopt = nla_data(tb[TCA_HTB_PARMS]);
1800         if (!hopt->rate.rate || !hopt->ceil.rate)
1801                 goto failure;
1802
1803         if (q->offload) {
1804                 /* Options not supported by the offload. */
1805                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1806                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1807                         goto failure;
1808                 }
1809                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1810                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1811                         goto failure;
1812                 }
1813                 if (hopt->quantum) {
1814                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1815                         goto failure;
1816                 }
1817         }
1818
1819         /* Keeping backward compatible with rate_table based iproute2 tc */
1820         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1821                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1822                                               NULL));
1823
1824         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1825                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1826                                               NULL));
1827
1828         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1829         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1830
1831         if (!cl) {              /* new class */
1832                 struct net_device *dev = qdisc_dev(sch);
1833                 struct Qdisc *new_q, *old_q;
1834                 int prio;
1835                 struct {
1836                         struct nlattr           nla;
1837                         struct gnet_estimator   opt;
1838                 } est = {
1839                         .nla = {
1840                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1841                                 .nla_type       = TCA_RATE,
1842                         },
1843                         .opt = {
1844                                 /* 4s interval, 16s averaging constant */
1845                                 .interval       = 2,
1846                                 .ewma_log       = 2,
1847                         },
1848                 };
1849
1850                 /* check for valid classid */
1851                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1852                     htb_find(classid, sch))
1853                         goto failure;
1854
1855                 /* check maximal depth */
1856                 if (parent && parent->parent && parent->parent->level < 2) {
1857                         NL_SET_ERR_MSG_MOD(extack, "tree is too deep");
1858                         goto failure;
1859                 }
1860                 err = -ENOBUFS;
1861                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1862                 if (!cl)
1863                         goto failure;
1864
1865                 gnet_stats_basic_sync_init(&cl->bstats);
1866                 gnet_stats_basic_sync_init(&cl->bstats_bias);
1867
1868                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1869                 if (err) {
1870                         kfree(cl);
1871                         goto failure;
1872                 }
1873                 if (htb_rate_est || tca[TCA_RATE]) {
1874                         err = gen_new_estimator(&cl->bstats, NULL,
1875                                                 &cl->rate_est,
1876                                                 NULL,
1877                                                 true,
1878                                                 tca[TCA_RATE] ? : &est.nla);
1879                         if (err)
1880                                 goto err_block_put;
1881                 }
1882
1883                 cl->children = 0;
1884                 RB_CLEAR_NODE(&cl->pq_node);
1885
1886                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1887                         RB_CLEAR_NODE(&cl->node[prio]);
1888
1889                 cl->common.classid = classid;
1890
1891                 /* Make sure nothing interrupts us in between of two
1892                  * ndo_setup_tc calls.
1893                  */
1894                 ASSERT_RTNL();
1895
1896                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1897                  * so that can't be used inside of sch_tree_lock
1898                  * -- thanks to Karlis Peisenieks
1899                  */
1900                 if (!q->offload) {
1901                         dev_queue = sch->dev_queue;
1902                 } else if (!(parent && !parent->level)) {
1903                         /* Assign a dev_queue to this classid. */
1904                         offload_opt = (struct tc_htb_qopt_offload) {
1905                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1906                                 .classid = cl->common.classid,
1907                                 .parent_classid = parent ?
1908                                         TC_H_MIN(parent->common.classid) :
1909                                         TC_HTB_CLASSID_ROOT,
1910                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1911                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1912                                 .prio = hopt->prio,
1913                                 .extack = extack,
1914                         };
1915                         err = htb_offload(dev, &offload_opt);
1916                         if (err) {
1917                                 NL_SET_ERR_MSG_WEAK(extack,
1918                                                     "Failed to offload TC_HTB_LEAF_ALLOC_QUEUE");
1919                                 goto err_kill_estimator;
1920                         }
1921                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1922                 } else { /* First child. */
1923                         dev_queue = htb_offload_get_queue(parent);
1924                         old_q = htb_graft_helper(dev_queue, NULL);
1925                         WARN_ON(old_q != parent->leaf.q);
1926                         offload_opt = (struct tc_htb_qopt_offload) {
1927                                 .command = TC_HTB_LEAF_TO_INNER,
1928                                 .classid = cl->common.classid,
1929                                 .parent_classid =
1930                                         TC_H_MIN(parent->common.classid),
1931                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1932                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1933                                 .prio = hopt->prio,
1934                                 .extack = extack,
1935                         };
1936                         err = htb_offload(dev, &offload_opt);
1937                         if (err) {
1938                                 NL_SET_ERR_MSG_WEAK(extack,
1939                                                     "Failed to offload TC_HTB_LEAF_TO_INNER");
1940                                 htb_graft_helper(dev_queue, old_q);
1941                                 goto err_kill_estimator;
1942                         }
1943                         _bstats_update(&parent->bstats_bias,
1944                                        u64_stats_read(&old_q->bstats.bytes),
1945                                        u64_stats_read(&old_q->bstats.packets));
1946                         qdisc_put(old_q);
1947                 }
1948                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1949                                           classid, NULL);
1950                 if (q->offload) {
1951                         if (new_q) {
1952                                 htb_set_lockdep_class_child(new_q);
1953                                 /* One ref for cl->leaf.q, the other for
1954                                  * dev_queue->qdisc.
1955                                  */
1956                                 qdisc_refcount_inc(new_q);
1957                         }
1958                         old_q = htb_graft_helper(dev_queue, new_q);
1959                         /* No qdisc_put needed. */
1960                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1961                 }
1962                 sch_tree_lock(sch);
1963                 if (parent && !parent->level) {
1964                         /* turn parent into inner node */
1965                         qdisc_purge_queue(parent->leaf.q);
1966                         parent_qdisc = parent->leaf.q;
1967                         if (parent->prio_activity)
1968                                 htb_deactivate(q, parent);
1969
1970                         /* remove from evt list because of level change */
1971                         if (parent->cmode != HTB_CAN_SEND) {
1972                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1973                                 parent->cmode = HTB_CAN_SEND;
1974                         }
1975                         parent->level = (parent->parent ? parent->parent->level
1976                                          : TC_HTB_MAXDEPTH) - 1;
1977                         memset(&parent->inner, 0, sizeof(parent->inner));
1978                 }
1979
1980                 /* leaf (we) needs elementary qdisc */
1981                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1982                 if (q->offload)
1983                         cl->leaf.offload_queue = dev_queue;
1984
1985                 cl->parent = parent;
1986
1987                 /* set class to be in HTB_CAN_SEND state */
1988                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1989                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1990                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1991                 cl->t_c = ktime_get_ns();
1992                 cl->cmode = HTB_CAN_SEND;
1993
1994                 /* attach to the hash list and parent's family */
1995                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1996                 if (parent)
1997                         parent->children++;
1998                 if (cl->leaf.q != &noop_qdisc)
1999                         qdisc_hash_add(cl->leaf.q, true);
2000         } else {
2001                 if (tca[TCA_RATE]) {
2002                         err = gen_replace_estimator(&cl->bstats, NULL,
2003                                                     &cl->rate_est,
2004                                                     NULL,
2005                                                     true,
2006                                                     tca[TCA_RATE]);
2007                         if (err)
2008                                 return err;
2009                 }
2010
2011                 if (q->offload) {
2012                         struct net_device *dev = qdisc_dev(sch);
2013
2014                         offload_opt = (struct tc_htb_qopt_offload) {
2015                                 .command = TC_HTB_NODE_MODIFY,
2016                                 .classid = cl->common.classid,
2017                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2018                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2019                                 .prio = hopt->prio,
2020                                 .extack = extack,
2021                         };
2022                         err = htb_offload(dev, &offload_opt);
2023                         if (err)
2024                                 /* Estimator was replaced, and rollback may fail
2025                                  * as well, so we don't try to recover it, and
2026                                  * the estimator won't work property with the
2027                                  * offload anyway, because bstats are updated
2028                                  * only when the stats are queried.
2029                                  */
2030                                 return err;
2031                 }
2032
2033                 sch_tree_lock(sch);
2034         }
2035
2036         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2037         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2038
2039         /* it used to be a nasty bug here, we have to check that node
2040          * is really leaf before changing cl->leaf !
2041          */
2042         if (!cl->level) {
2043                 u64 quantum = cl->rate.rate_bytes_ps;
2044
2045                 do_div(quantum, q->rate2quantum);
2046                 cl->quantum = min_t(u64, quantum, INT_MAX);
2047
2048                 if (!hopt->quantum && cl->quantum < 1000) {
2049                         warn = -1;
2050                         cl->quantum = 1000;
2051                 }
2052                 if (!hopt->quantum && cl->quantum > 200000) {
2053                         warn = 1;
2054                         cl->quantum = 200000;
2055                 }
2056                 if (hopt->quantum)
2057                         cl->quantum = hopt->quantum;
2058                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2059                         cl->prio = TC_HTB_NUMPRIO - 1;
2060         }
2061
2062         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2063         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2064
2065         sch_tree_unlock(sch);
2066         qdisc_put(parent_qdisc);
2067
2068         if (warn)
2069                 NL_SET_ERR_MSG_FMT_MOD(extack,
2070                                        "quantum of class %X is %s. Consider r2q change.",
2071                                        cl->common.classid, (warn == -1 ? "small" : "big"));
2072
2073         qdisc_class_hash_grow(sch, &q->clhash);
2074
2075         *arg = (unsigned long)cl;
2076         return 0;
2077
2078 err_kill_estimator:
2079         gen_kill_estimator(&cl->rate_est);
2080 err_block_put:
2081         tcf_block_put(cl->block);
2082         kfree(cl);
2083 failure:
2084         return err;
2085 }
2086
2087 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2088                                        struct netlink_ext_ack *extack)
2089 {
2090         struct htb_sched *q = qdisc_priv(sch);
2091         struct htb_class *cl = (struct htb_class *)arg;
2092
2093         return cl ? cl->block : q->block;
2094 }
2095
2096 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2097                                      u32 classid)
2098 {
2099         struct htb_class *cl = htb_find(classid, sch);
2100
2101         /*if (cl && !cl->level) return 0;
2102          * The line above used to be there to prevent attaching filters to
2103          * leaves. But at least tc_index filter uses this just to get class
2104          * for other reasons so that we have to allow for it.
2105          * ----
2106          * 19.6.2002 As Werner explained it is ok - bind filter is just
2107          * another way to "lock" the class - unlike "get" this lock can
2108          * be broken by class during destroy IIUC.
2109          */
2110         if (cl)
2111                 cl->filter_cnt++;
2112         return (unsigned long)cl;
2113 }
2114
2115 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2116 {
2117         struct htb_class *cl = (struct htb_class *)arg;
2118
2119         if (cl)
2120                 cl->filter_cnt--;
2121 }
2122
2123 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2124 {
2125         struct htb_sched *q = qdisc_priv(sch);
2126         struct htb_class *cl;
2127         unsigned int i;
2128
2129         if (arg->stop)
2130                 return;
2131
2132         for (i = 0; i < q->clhash.hashsize; i++) {
2133                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2134                         if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2135                                 return;
2136                 }
2137         }
2138 }
2139
2140 static const struct Qdisc_class_ops htb_class_ops = {
2141         .select_queue   =       htb_select_queue,
2142         .graft          =       htb_graft,
2143         .leaf           =       htb_leaf,
2144         .qlen_notify    =       htb_qlen_notify,
2145         .find           =       htb_search,
2146         .change         =       htb_change_class,
2147         .delete         =       htb_delete,
2148         .walk           =       htb_walk,
2149         .tcf_block      =       htb_tcf_block,
2150         .bind_tcf       =       htb_bind_filter,
2151         .unbind_tcf     =       htb_unbind_filter,
2152         .dump           =       htb_dump_class,
2153         .dump_stats     =       htb_dump_class_stats,
2154 };
2155
2156 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2157         .cl_ops         =       &htb_class_ops,
2158         .id             =       "htb",
2159         .priv_size      =       sizeof(struct htb_sched),
2160         .enqueue        =       htb_enqueue,
2161         .dequeue        =       htb_dequeue,
2162         .peek           =       qdisc_peek_dequeued,
2163         .init           =       htb_init,
2164         .attach         =       htb_attach,
2165         .reset          =       htb_reset,
2166         .destroy        =       htb_destroy,
2167         .dump           =       htb_dump,
2168         .owner          =       THIS_MODULE,
2169 };
2170
2171 static int __init htb_module_init(void)
2172 {
2173         return register_qdisc(&htb_qdisc_ops);
2174 }
2175 static void __exit htb_module_exit(void)
2176 {
2177         unregister_qdisc(&htb_qdisc_ops);
2178 }
2179
2180 module_init(htb_module_init)
2181 module_exit(htb_module_exit)
2182 MODULE_LICENSE("GPL");