Mention branches and keyring.
[releases.git] / sched / sch_htb.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
4  *
5  * Authors:     Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *                      HTB support at LARTC mailing list
10  *              Ondrej Kraus, <krauso@barr.cz>
11  *                      found missing INIT_QDISC(htb)
12  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *                      helped a lot to locate nasty class stall bug
14  *              Andi Kleen, Jamal Hadi, Bert Hubert
15  *                      code review and helpful comments on shaping
16  *              Tomasz Wrona, <tw@eter.tym.pl>
17  *                      created test case so that I was able to fix nasty bug
18  *              Wilfried Weissmann
19  *                      spotted bug in dequeue code and helped with fix
20  *              Jiri Fojtasek
21  *                      fixed requeue routine
22  *              and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
56
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71         HTB_CANT_SEND,          /* class can't send and can't borrow */
72         HTB_MAY_BORROW,         /* class can't send but may borrow */
73         HTB_CAN_SEND            /* class can send */
74 };
75
76 struct htb_prio {
77         union {
78                 struct rb_root  row;
79                 struct rb_root  feed;
80         };
81         struct rb_node  *ptr;
82         /* When class changes from state 1->2 and disconnects from
83          * parent's feed then we lost ptr value and start from the
84          * first child again. Here we store classid of the
85          * last valid ptr (used when ptr is NULL).
86          */
87         u32             last_ptr_id;
88 };
89
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95         struct Qdisc_class_common common;
96         struct psched_ratecfg   rate;
97         struct psched_ratecfg   ceil;
98         s64                     buffer, cbuffer;/* token bucket depth/rate */
99         s64                     mbuffer;        /* max wait time */
100         u32                     prio;           /* these two are used only by leaves... */
101         int                     quantum;        /* but stored for parent-to-leaf return */
102
103         struct tcf_proto __rcu  *filter_list;   /* class attached filters */
104         struct tcf_block        *block;
105
106         int                     level;          /* our level (see above) */
107         unsigned int            children;
108         struct htb_class        *parent;        /* parent class */
109
110         struct net_rate_estimator __rcu *rate_est;
111
112         /*
113          * Written often fields
114          */
115         struct gnet_stats_basic_sync bstats;
116         struct gnet_stats_basic_sync bstats_bias;
117         struct tc_htb_xstats    xstats; /* our special stats */
118
119         /* token bucket parameters */
120         s64                     tokens, ctokens;/* current number of tokens */
121         s64                     t_c;            /* checkpoint time */
122
123         union {
124                 struct htb_class_leaf {
125                         int             deficit[TC_HTB_MAXDEPTH];
126                         struct Qdisc    *q;
127                         struct netdev_queue *offload_queue;
128                 } leaf;
129                 struct htb_class_inner {
130                         struct htb_prio clprio[TC_HTB_NUMPRIO];
131                 } inner;
132         };
133         s64                     pq_key;
134
135         int                     prio_activity;  /* for which prios are we active */
136         enum htb_cmode          cmode;          /* current mode of the class */
137         struct rb_node          pq_node;        /* node for event queue */
138         struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
139
140         unsigned int drops ____cacheline_aligned_in_smp;
141         unsigned int            overlimits;
142 };
143
144 struct htb_level {
145         struct rb_root  wait_pq;
146         struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148
149 struct htb_sched {
150         struct Qdisc_class_hash clhash;
151         int                     defcls;         /* class where unclassified flows go to */
152         int                     rate2quantum;   /* quant = rate / rate2quantum */
153
154         /* filters for qdisc itself */
155         struct tcf_proto __rcu  *filter_list;
156         struct tcf_block        *block;
157
158 #define HTB_WARN_TOOMANYEVENTS  0x1
159         unsigned int            warned; /* only one warning */
160         int                     direct_qlen;
161         struct work_struct      work;
162
163         /* non shaped skbs; let them go directly thru */
164         struct qdisc_skb_head   direct_queue;
165         u32                     direct_pkts;
166         u32                     overlimits;
167
168         struct qdisc_watchdog   watchdog;
169
170         s64                     now;    /* cached dequeue time */
171
172         /* time of nearest event per level (row) */
173         s64                     near_ev_cache[TC_HTB_MAXDEPTH];
174
175         int                     row_mask[TC_HTB_MAXDEPTH];
176
177         struct htb_level        hlevel[TC_HTB_MAXDEPTH];
178
179         struct Qdisc            **direct_qdiscs;
180         unsigned int            num_direct_qdiscs;
181
182         bool                    offload;
183 };
184
185 /* find class in global hash table using given handle */
186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188         struct htb_sched *q = qdisc_priv(sch);
189         struct Qdisc_class_common *clc;
190
191         clc = qdisc_class_find(&q->clhash, handle);
192         if (clc == NULL)
193                 return NULL;
194         return container_of(clc, struct htb_class, common);
195 }
196
197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199         return (unsigned long)htb_find(handle, sch);
200 }
201
202 #define HTB_DIRECT ((struct htb_class *)-1L)
203
204 /**
205  * htb_classify - classify a packet into class
206  * @skb: the socket buffer
207  * @sch: the active queue discipline
208  * @qerr: pointer for returned status code
209  *
210  * It returns NULL if the packet should be dropped or -1 if the packet
211  * should be passed directly thru. In all other cases leaf class is returned.
212  * We allow direct class selection by classid in priority. The we examine
213  * filters in qdisc and in inner nodes (if higher filter points to the inner
214  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
215  * internal fifo (direct). These packets then go directly thru. If we still
216  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
217  * then finish and return direct queue.
218  */
219 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
220                                       int *qerr)
221 {
222         struct htb_sched *q = qdisc_priv(sch);
223         struct htb_class *cl;
224         struct tcf_result res;
225         struct tcf_proto *tcf;
226         int result;
227
228         /* allow to select class by setting skb->priority to valid classid;
229          * note that nfmark can be used too by attaching filter fw with no
230          * rules in it
231          */
232         if (skb->priority == sch->handle)
233                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
234         cl = htb_find(skb->priority, sch);
235         if (cl) {
236                 if (cl->level == 0)
237                         return cl;
238                 /* Start with inner filter chain if a non-leaf class is selected */
239                 tcf = rcu_dereference_bh(cl->filter_list);
240         } else {
241                 tcf = rcu_dereference_bh(q->filter_list);
242         }
243
244         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
245         while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
246 #ifdef CONFIG_NET_CLS_ACT
247                 switch (result) {
248                 case TC_ACT_QUEUED:
249                 case TC_ACT_STOLEN:
250                 case TC_ACT_TRAP:
251                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
252                         fallthrough;
253                 case TC_ACT_SHOT:
254                         return NULL;
255                 }
256 #endif
257                 cl = (void *)res.class;
258                 if (!cl) {
259                         if (res.classid == sch->handle)
260                                 return HTB_DIRECT;      /* X:0 (direct flow) */
261                         cl = htb_find(res.classid, sch);
262                         if (!cl)
263                                 break;  /* filter selected invalid classid */
264                 }
265                 if (!cl->level)
266                         return cl;      /* we hit leaf; return it */
267
268                 /* we have got inner class; apply inner filter chain */
269                 tcf = rcu_dereference_bh(cl->filter_list);
270         }
271         /* classification failed; try to use default class */
272         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
273         if (!cl || cl->level)
274                 return HTB_DIRECT;      /* bad default .. this is safe bet */
275         return cl;
276 }
277
278 /**
279  * htb_add_to_id_tree - adds class to the round robin list
280  * @root: the root of the tree
281  * @cl: the class to add
282  * @prio: the give prio in class
283  *
284  * Routine adds class to the list (actually tree) sorted by classid.
285  * Make sure that class is not already on such list for given prio.
286  */
287 static void htb_add_to_id_tree(struct rb_root *root,
288                                struct htb_class *cl, int prio)
289 {
290         struct rb_node **p = &root->rb_node, *parent = NULL;
291
292         while (*p) {
293                 struct htb_class *c;
294                 parent = *p;
295                 c = rb_entry(parent, struct htb_class, node[prio]);
296
297                 if (cl->common.classid > c->common.classid)
298                         p = &parent->rb_right;
299                 else
300                         p = &parent->rb_left;
301         }
302         rb_link_node(&cl->node[prio], parent, p);
303         rb_insert_color(&cl->node[prio], root);
304 }
305
306 /**
307  * htb_add_to_wait_tree - adds class to the event queue with delay
308  * @q: the priority event queue
309  * @cl: the class to add
310  * @delay: delay in microseconds
311  *
312  * The class is added to priority event queue to indicate that class will
313  * change its mode in cl->pq_key microseconds. Make sure that class is not
314  * already in the queue.
315  */
316 static void htb_add_to_wait_tree(struct htb_sched *q,
317                                  struct htb_class *cl, s64 delay)
318 {
319         struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
320
321         cl->pq_key = q->now + delay;
322         if (cl->pq_key == q->now)
323                 cl->pq_key++;
324
325         /* update the nearest event cache */
326         if (q->near_ev_cache[cl->level] > cl->pq_key)
327                 q->near_ev_cache[cl->level] = cl->pq_key;
328
329         while (*p) {
330                 struct htb_class *c;
331                 parent = *p;
332                 c = rb_entry(parent, struct htb_class, pq_node);
333                 if (cl->pq_key >= c->pq_key)
334                         p = &parent->rb_right;
335                 else
336                         p = &parent->rb_left;
337         }
338         rb_link_node(&cl->pq_node, parent, p);
339         rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
340 }
341
342 /**
343  * htb_next_rb_node - finds next node in binary tree
344  * @n: the current node in binary tree
345  *
346  * When we are past last key we return NULL.
347  * Average complexity is 2 steps per call.
348  */
349 static inline void htb_next_rb_node(struct rb_node **n)
350 {
351         *n = rb_next(*n);
352 }
353
354 /**
355  * htb_add_class_to_row - add class to its row
356  * @q: the priority event queue
357  * @cl: the class to add
358  * @mask: the given priorities in class in bitmap
359  *
360  * The class is added to row at priorities marked in mask.
361  * It does nothing if mask == 0.
362  */
363 static inline void htb_add_class_to_row(struct htb_sched *q,
364                                         struct htb_class *cl, int mask)
365 {
366         q->row_mask[cl->level] |= mask;
367         while (mask) {
368                 int prio = ffz(~mask);
369                 mask &= ~(1 << prio);
370                 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
371         }
372 }
373
374 /* If this triggers, it is a bug in this code, but it need not be fatal */
375 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
376 {
377         if (RB_EMPTY_NODE(rb)) {
378                 WARN_ON(1);
379         } else {
380                 rb_erase(rb, root);
381                 RB_CLEAR_NODE(rb);
382         }
383 }
384
385
386 /**
387  * htb_remove_class_from_row - removes class from its row
388  * @q: the priority event queue
389  * @cl: the class to add
390  * @mask: the given priorities in class in bitmap
391  *
392  * The class is removed from row at priorities marked in mask.
393  * It does nothing if mask == 0.
394  */
395 static inline void htb_remove_class_from_row(struct htb_sched *q,
396                                                  struct htb_class *cl, int mask)
397 {
398         int m = 0;
399         struct htb_level *hlevel = &q->hlevel[cl->level];
400
401         while (mask) {
402                 int prio = ffz(~mask);
403                 struct htb_prio *hprio = &hlevel->hprio[prio];
404
405                 mask &= ~(1 << prio);
406                 if (hprio->ptr == cl->node + prio)
407                         htb_next_rb_node(&hprio->ptr);
408
409                 htb_safe_rb_erase(cl->node + prio, &hprio->row);
410                 if (!hprio->row.rb_node)
411                         m |= 1 << prio;
412         }
413         q->row_mask[cl->level] &= ~m;
414 }
415
416 /**
417  * htb_activate_prios - creates active classe's feed chain
418  * @q: the priority event queue
419  * @cl: the class to activate
420  *
421  * The class is connected to ancestors and/or appropriate rows
422  * for priorities it is participating on. cl->cmode must be new
423  * (activated) mode. It does nothing if cl->prio_activity == 0.
424  */
425 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
426 {
427         struct htb_class *p = cl->parent;
428         long m, mask = cl->prio_activity;
429
430         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
431                 m = mask;
432                 while (m) {
433                         unsigned int prio = ffz(~m);
434
435                         if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
436                                 break;
437                         m &= ~(1 << prio);
438
439                         if (p->inner.clprio[prio].feed.rb_node)
440                                 /* parent already has its feed in use so that
441                                  * reset bit in mask as parent is already ok
442                                  */
443                                 mask &= ~(1 << prio);
444
445                         htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
446                 }
447                 p->prio_activity |= mask;
448                 cl = p;
449                 p = cl->parent;
450
451         }
452         if (cl->cmode == HTB_CAN_SEND && mask)
453                 htb_add_class_to_row(q, cl, mask);
454 }
455
456 /**
457  * htb_deactivate_prios - remove class from feed chain
458  * @q: the priority event queue
459  * @cl: the class to deactivate
460  *
461  * cl->cmode must represent old mode (before deactivation). It does
462  * nothing if cl->prio_activity == 0. Class is removed from all feed
463  * chains and rows.
464  */
465 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
466 {
467         struct htb_class *p = cl->parent;
468         long m, mask = cl->prio_activity;
469
470         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
471                 m = mask;
472                 mask = 0;
473                 while (m) {
474                         int prio = ffz(~m);
475                         m &= ~(1 << prio);
476
477                         if (p->inner.clprio[prio].ptr == cl->node + prio) {
478                                 /* we are removing child which is pointed to from
479                                  * parent feed - forget the pointer but remember
480                                  * classid
481                                  */
482                                 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
483                                 p->inner.clprio[prio].ptr = NULL;
484                         }
485
486                         htb_safe_rb_erase(cl->node + prio,
487                                           &p->inner.clprio[prio].feed);
488
489                         if (!p->inner.clprio[prio].feed.rb_node)
490                                 mask |= 1 << prio;
491                 }
492
493                 p->prio_activity &= ~mask;
494                 cl = p;
495                 p = cl->parent;
496
497         }
498         if (cl->cmode == HTB_CAN_SEND && mask)
499                 htb_remove_class_from_row(q, cl, mask);
500 }
501
502 static inline s64 htb_lowater(const struct htb_class *cl)
503 {
504         if (htb_hysteresis)
505                 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
506         else
507                 return 0;
508 }
509 static inline s64 htb_hiwater(const struct htb_class *cl)
510 {
511         if (htb_hysteresis)
512                 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
513         else
514                 return 0;
515 }
516
517
518 /**
519  * htb_class_mode - computes and returns current class mode
520  * @cl: the target class
521  * @diff: diff time in microseconds
522  *
523  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
524  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
525  * from now to time when cl will change its state.
526  * Also it is worth to note that class mode doesn't change simply
527  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
528  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
529  * mode transitions per time unit. The speed gain is about 1/6.
530  */
531 static inline enum htb_cmode
532 htb_class_mode(struct htb_class *cl, s64 *diff)
533 {
534         s64 toks;
535
536         if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
537                 *diff = -toks;
538                 return HTB_CANT_SEND;
539         }
540
541         if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
542                 return HTB_CAN_SEND;
543
544         *diff = -toks;
545         return HTB_MAY_BORROW;
546 }
547
548 /**
549  * htb_change_class_mode - changes classe's mode
550  * @q: the priority event queue
551  * @cl: the target class
552  * @diff: diff time in microseconds
553  *
554  * This should be the only way how to change classe's mode under normal
555  * circumstances. Routine will update feed lists linkage, change mode
556  * and add class to the wait event queue if appropriate. New mode should
557  * be different from old one and cl->pq_key has to be valid if changing
558  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
559  */
560 static void
561 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
562 {
563         enum htb_cmode new_mode = htb_class_mode(cl, diff);
564
565         if (new_mode == cl->cmode)
566                 return;
567
568         if (new_mode == HTB_CANT_SEND) {
569                 cl->overlimits++;
570                 q->overlimits++;
571         }
572
573         if (cl->prio_activity) {        /* not necessary: speed optimization */
574                 if (cl->cmode != HTB_CANT_SEND)
575                         htb_deactivate_prios(q, cl);
576                 cl->cmode = new_mode;
577                 if (new_mode != HTB_CANT_SEND)
578                         htb_activate_prios(q, cl);
579         } else
580                 cl->cmode = new_mode;
581 }
582
583 /**
584  * htb_activate - inserts leaf cl into appropriate active feeds
585  * @q: the priority event queue
586  * @cl: the target class
587  *
588  * Routine learns (new) priority of leaf and activates feed chain
589  * for the prio. It can be called on already active leaf safely.
590  * It also adds leaf into droplist.
591  */
592 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
593 {
594         WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
595
596         if (!cl->prio_activity) {
597                 cl->prio_activity = 1 << cl->prio;
598                 htb_activate_prios(q, cl);
599         }
600 }
601
602 /**
603  * htb_deactivate - remove leaf cl from active feeds
604  * @q: the priority event queue
605  * @cl: the target class
606  *
607  * Make sure that leaf is active. In the other words it can't be called
608  * with non-active leaf. It also removes class from the drop list.
609  */
610 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
611 {
612         WARN_ON(!cl->prio_activity);
613
614         htb_deactivate_prios(q, cl);
615         cl->prio_activity = 0;
616 }
617
618 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
619                        struct sk_buff **to_free)
620 {
621         int ret;
622         unsigned int len = qdisc_pkt_len(skb);
623         struct htb_sched *q = qdisc_priv(sch);
624         struct htb_class *cl = htb_classify(skb, sch, &ret);
625
626         if (cl == HTB_DIRECT) {
627                 /* enqueue to helper queue */
628                 if (q->direct_queue.qlen < q->direct_qlen) {
629                         __qdisc_enqueue_tail(skb, &q->direct_queue);
630                         q->direct_pkts++;
631                 } else {
632                         return qdisc_drop(skb, sch, to_free);
633                 }
634 #ifdef CONFIG_NET_CLS_ACT
635         } else if (!cl) {
636                 if (ret & __NET_XMIT_BYPASS)
637                         qdisc_qstats_drop(sch);
638                 __qdisc_drop(skb, to_free);
639                 return ret;
640 #endif
641         } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
642                                         to_free)) != NET_XMIT_SUCCESS) {
643                 if (net_xmit_drop_count(ret)) {
644                         qdisc_qstats_drop(sch);
645                         cl->drops++;
646                 }
647                 return ret;
648         } else {
649                 htb_activate(q, cl);
650         }
651
652         sch->qstats.backlog += len;
653         sch->q.qlen++;
654         return NET_XMIT_SUCCESS;
655 }
656
657 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
658 {
659         s64 toks = diff + cl->tokens;
660
661         if (toks > cl->buffer)
662                 toks = cl->buffer;
663         toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
664         if (toks <= -cl->mbuffer)
665                 toks = 1 - cl->mbuffer;
666
667         cl->tokens = toks;
668 }
669
670 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
671 {
672         s64 toks = diff + cl->ctokens;
673
674         if (toks > cl->cbuffer)
675                 toks = cl->cbuffer;
676         toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
677         if (toks <= -cl->mbuffer)
678                 toks = 1 - cl->mbuffer;
679
680         cl->ctokens = toks;
681 }
682
683 /**
684  * htb_charge_class - charges amount "bytes" to leaf and ancestors
685  * @q: the priority event queue
686  * @cl: the class to start iterate
687  * @level: the minimum level to account
688  * @skb: the socket buffer
689  *
690  * Routine assumes that packet "bytes" long was dequeued from leaf cl
691  * borrowing from "level". It accounts bytes to ceil leaky bucket for
692  * leaf and all ancestors and to rate bucket for ancestors at levels
693  * "level" and higher. It also handles possible change of mode resulting
694  * from the update. Note that mode can also increase here (MAY_BORROW to
695  * CAN_SEND) because we can use more precise clock that event queue here.
696  * In such case we remove class from event queue first.
697  */
698 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
699                              int level, struct sk_buff *skb)
700 {
701         int bytes = qdisc_pkt_len(skb);
702         enum htb_cmode old_mode;
703         s64 diff;
704
705         while (cl) {
706                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
707                 if (cl->level >= level) {
708                         if (cl->level == level)
709                                 cl->xstats.lends++;
710                         htb_accnt_tokens(cl, bytes, diff);
711                 } else {
712                         cl->xstats.borrows++;
713                         cl->tokens += diff;     /* we moved t_c; update tokens */
714                 }
715                 htb_accnt_ctokens(cl, bytes, diff);
716                 cl->t_c = q->now;
717
718                 old_mode = cl->cmode;
719                 diff = 0;
720                 htb_change_class_mode(q, cl, &diff);
721                 if (old_mode != cl->cmode) {
722                         if (old_mode != HTB_CAN_SEND)
723                                 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
724                         if (cl->cmode != HTB_CAN_SEND)
725                                 htb_add_to_wait_tree(q, cl, diff);
726                 }
727
728                 /* update basic stats except for leaves which are already updated */
729                 if (cl->level)
730                         bstats_update(&cl->bstats, skb);
731
732                 cl = cl->parent;
733         }
734 }
735
736 /**
737  * htb_do_events - make mode changes to classes at the level
738  * @q: the priority event queue
739  * @level: which wait_pq in 'q->hlevel'
740  * @start: start jiffies
741  *
742  * Scans event queue for pending events and applies them. Returns time of
743  * next pending event (0 for no event in pq, q->now for too many events).
744  * Note: Applied are events whose have cl->pq_key <= q->now.
745  */
746 static s64 htb_do_events(struct htb_sched *q, const int level,
747                          unsigned long start)
748 {
749         /* don't run for longer than 2 jiffies; 2 is used instead of
750          * 1 to simplify things when jiffy is going to be incremented
751          * too soon
752          */
753         unsigned long stop_at = start + 2;
754         struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
755
756         while (time_before(jiffies, stop_at)) {
757                 struct htb_class *cl;
758                 s64 diff;
759                 struct rb_node *p = rb_first(wait_pq);
760
761                 if (!p)
762                         return 0;
763
764                 cl = rb_entry(p, struct htb_class, pq_node);
765                 if (cl->pq_key > q->now)
766                         return cl->pq_key;
767
768                 htb_safe_rb_erase(p, wait_pq);
769                 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
770                 htb_change_class_mode(q, cl, &diff);
771                 if (cl->cmode != HTB_CAN_SEND)
772                         htb_add_to_wait_tree(q, cl, diff);
773         }
774
775         /* too much load - let's continue after a break for scheduling */
776         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
777                 pr_warn("htb: too many events!\n");
778                 q->warned |= HTB_WARN_TOOMANYEVENTS;
779         }
780
781         return q->now;
782 }
783
784 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
785  * is no such one exists.
786  */
787 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
788                                               u32 id)
789 {
790         struct rb_node *r = NULL;
791         while (n) {
792                 struct htb_class *cl =
793                     rb_entry(n, struct htb_class, node[prio]);
794
795                 if (id > cl->common.classid) {
796                         n = n->rb_right;
797                 } else if (id < cl->common.classid) {
798                         r = n;
799                         n = n->rb_left;
800                 } else {
801                         return n;
802                 }
803         }
804         return r;
805 }
806
807 /**
808  * htb_lookup_leaf - returns next leaf class in DRR order
809  * @hprio: the current one
810  * @prio: which prio in class
811  *
812  * Find leaf where current feed pointers points to.
813  */
814 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
815 {
816         int i;
817         struct {
818                 struct rb_node *root;
819                 struct rb_node **pptr;
820                 u32 *pid;
821         } stk[TC_HTB_MAXDEPTH], *sp = stk;
822
823         BUG_ON(!hprio->row.rb_node);
824         sp->root = hprio->row.rb_node;
825         sp->pptr = &hprio->ptr;
826         sp->pid = &hprio->last_ptr_id;
827
828         for (i = 0; i < 65535; i++) {
829                 if (!*sp->pptr && *sp->pid) {
830                         /* ptr was invalidated but id is valid - try to recover
831                          * the original or next ptr
832                          */
833                         *sp->pptr =
834                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
835                 }
836                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
837                                  * can become out of date quickly
838                                  */
839                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
840                         *sp->pptr = sp->root;
841                         while ((*sp->pptr)->rb_left)
842                                 *sp->pptr = (*sp->pptr)->rb_left;
843                         if (sp > stk) {
844                                 sp--;
845                                 if (!*sp->pptr) {
846                                         WARN_ON(1);
847                                         return NULL;
848                                 }
849                                 htb_next_rb_node(sp->pptr);
850                         }
851                 } else {
852                         struct htb_class *cl;
853                         struct htb_prio *clp;
854
855                         cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
856                         if (!cl->level)
857                                 return cl;
858                         clp = &cl->inner.clprio[prio];
859                         (++sp)->root = clp->feed.rb_node;
860                         sp->pptr = &clp->ptr;
861                         sp->pid = &clp->last_ptr_id;
862                 }
863         }
864         WARN_ON(1);
865         return NULL;
866 }
867
868 /* dequeues packet at given priority and level; call only if
869  * you are sure that there is active class at prio/level
870  */
871 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
872                                         const int level)
873 {
874         struct sk_buff *skb = NULL;
875         struct htb_class *cl, *start;
876         struct htb_level *hlevel = &q->hlevel[level];
877         struct htb_prio *hprio = &hlevel->hprio[prio];
878
879         /* look initial class up in the row */
880         start = cl = htb_lookup_leaf(hprio, prio);
881
882         do {
883 next:
884                 if (unlikely(!cl))
885                         return NULL;
886
887                 /* class can be empty - it is unlikely but can be true if leaf
888                  * qdisc drops packets in enqueue routine or if someone used
889                  * graft operation on the leaf since last dequeue;
890                  * simply deactivate and skip such class
891                  */
892                 if (unlikely(cl->leaf.q->q.qlen == 0)) {
893                         struct htb_class *next;
894                         htb_deactivate(q, cl);
895
896                         /* row/level might become empty */
897                         if ((q->row_mask[level] & (1 << prio)) == 0)
898                                 return NULL;
899
900                         next = htb_lookup_leaf(hprio, prio);
901
902                         if (cl == start)        /* fix start if we just deleted it */
903                                 start = next;
904                         cl = next;
905                         goto next;
906                 }
907
908                 skb = cl->leaf.q->dequeue(cl->leaf.q);
909                 if (likely(skb != NULL))
910                         break;
911
912                 qdisc_warn_nonwc("htb", cl->leaf.q);
913                 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
914                                          &q->hlevel[0].hprio[prio].ptr);
915                 cl = htb_lookup_leaf(hprio, prio);
916
917         } while (cl != start);
918
919         if (likely(skb != NULL)) {
920                 bstats_update(&cl->bstats, skb);
921                 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
922                 if (cl->leaf.deficit[level] < 0) {
923                         cl->leaf.deficit[level] += cl->quantum;
924                         htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
925                                                  &q->hlevel[0].hprio[prio].ptr);
926                 }
927                 /* this used to be after charge_class but this constelation
928                  * gives us slightly better performance
929                  */
930                 if (!cl->leaf.q->q.qlen)
931                         htb_deactivate(q, cl);
932                 htb_charge_class(q, cl, level, skb);
933         }
934         return skb;
935 }
936
937 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
938 {
939         struct sk_buff *skb;
940         struct htb_sched *q = qdisc_priv(sch);
941         int level;
942         s64 next_event;
943         unsigned long start_at;
944
945         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
946         skb = __qdisc_dequeue_head(&q->direct_queue);
947         if (skb != NULL) {
948 ok:
949                 qdisc_bstats_update(sch, skb);
950                 qdisc_qstats_backlog_dec(sch, skb);
951                 sch->q.qlen--;
952                 return skb;
953         }
954
955         if (!sch->q.qlen)
956                 goto fin;
957         q->now = ktime_get_ns();
958         start_at = jiffies;
959
960         next_event = q->now + 5LLU * NSEC_PER_SEC;
961
962         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
963                 /* common case optimization - skip event handler quickly */
964                 int m;
965                 s64 event = q->near_ev_cache[level];
966
967                 if (q->now >= event) {
968                         event = htb_do_events(q, level, start_at);
969                         if (!event)
970                                 event = q->now + NSEC_PER_SEC;
971                         q->near_ev_cache[level] = event;
972                 }
973
974                 if (next_event > event)
975                         next_event = event;
976
977                 m = ~q->row_mask[level];
978                 while (m != (int)(-1)) {
979                         int prio = ffz(m);
980
981                         m |= 1 << prio;
982                         skb = htb_dequeue_tree(q, prio, level);
983                         if (likely(skb != NULL))
984                                 goto ok;
985                 }
986         }
987         if (likely(next_event > q->now))
988                 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
989         else
990                 schedule_work(&q->work);
991 fin:
992         return skb;
993 }
994
995 /* reset all classes */
996 /* always caled under BH & queue lock */
997 static void htb_reset(struct Qdisc *sch)
998 {
999         struct htb_sched *q = qdisc_priv(sch);
1000         struct htb_class *cl;
1001         unsigned int i;
1002
1003         for (i = 0; i < q->clhash.hashsize; i++) {
1004                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1005                         if (cl->level)
1006                                 memset(&cl->inner, 0, sizeof(cl->inner));
1007                         else {
1008                                 if (cl->leaf.q && !q->offload)
1009                                         qdisc_reset(cl->leaf.q);
1010                         }
1011                         cl->prio_activity = 0;
1012                         cl->cmode = HTB_CAN_SEND;
1013                 }
1014         }
1015         qdisc_watchdog_cancel(&q->watchdog);
1016         __qdisc_reset_queue(&q->direct_queue);
1017         memset(q->hlevel, 0, sizeof(q->hlevel));
1018         memset(q->row_mask, 0, sizeof(q->row_mask));
1019 }
1020
1021 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1022         [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1023         [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1024         [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1025         [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026         [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1027         [TCA_HTB_RATE64] = { .type = NLA_U64 },
1028         [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1029         [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1030 };
1031
1032 static void htb_work_func(struct work_struct *work)
1033 {
1034         struct htb_sched *q = container_of(work, struct htb_sched, work);
1035         struct Qdisc *sch = q->watchdog.qdisc;
1036
1037         rcu_read_lock();
1038         __netif_schedule(qdisc_root(sch));
1039         rcu_read_unlock();
1040 }
1041
1042 static void htb_set_lockdep_class_child(struct Qdisc *q)
1043 {
1044         static struct lock_class_key child_key;
1045
1046         lockdep_set_class(qdisc_lock(q), &child_key);
1047 }
1048
1049 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1050 {
1051         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1052 }
1053
1054 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1055                     struct netlink_ext_ack *extack)
1056 {
1057         struct net_device *dev = qdisc_dev(sch);
1058         struct tc_htb_qopt_offload offload_opt;
1059         struct htb_sched *q = qdisc_priv(sch);
1060         struct nlattr *tb[TCA_HTB_MAX + 1];
1061         struct tc_htb_glob *gopt;
1062         unsigned int ntx;
1063         bool offload;
1064         int err;
1065
1066         qdisc_watchdog_init(&q->watchdog, sch);
1067         INIT_WORK(&q->work, htb_work_func);
1068
1069         if (!opt)
1070                 return -EINVAL;
1071
1072         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1073         if (err)
1074                 return err;
1075
1076         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1077                                           NULL);
1078         if (err < 0)
1079                 return err;
1080
1081         if (!tb[TCA_HTB_INIT])
1082                 return -EINVAL;
1083
1084         gopt = nla_data(tb[TCA_HTB_INIT]);
1085         if (gopt->version != HTB_VER >> 16)
1086                 return -EINVAL;
1087
1088         offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1089
1090         if (offload) {
1091                 if (sch->parent != TC_H_ROOT) {
1092                         NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1093                         return -EOPNOTSUPP;
1094                 }
1095
1096                 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1097                         NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1098                         return -EOPNOTSUPP;
1099                 }
1100
1101                 q->num_direct_qdiscs = dev->real_num_tx_queues;
1102                 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1103                                            sizeof(*q->direct_qdiscs),
1104                                            GFP_KERNEL);
1105                 if (!q->direct_qdiscs)
1106                         return -ENOMEM;
1107         }
1108
1109         err = qdisc_class_hash_init(&q->clhash);
1110         if (err < 0)
1111                 return err;
1112
1113         if (tb[TCA_HTB_DIRECT_QLEN])
1114                 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1115         else
1116                 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1117
1118         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1119                 q->rate2quantum = 1;
1120         q->defcls = gopt->defcls;
1121
1122         if (!offload)
1123                 return 0;
1124
1125         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1126                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1127                 struct Qdisc *qdisc;
1128
1129                 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1130                                           TC_H_MAKE(sch->handle, 0), extack);
1131                 if (!qdisc) {
1132                         return -ENOMEM;
1133                 }
1134
1135                 htb_set_lockdep_class_child(qdisc);
1136                 q->direct_qdiscs[ntx] = qdisc;
1137                 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1138         }
1139
1140         sch->flags |= TCQ_F_MQROOT;
1141
1142         offload_opt = (struct tc_htb_qopt_offload) {
1143                 .command = TC_HTB_CREATE,
1144                 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1145                 .classid = TC_H_MIN(q->defcls),
1146                 .extack = extack,
1147         };
1148         err = htb_offload(dev, &offload_opt);
1149         if (err)
1150                 return err;
1151
1152         /* Defer this assignment, so that htb_destroy skips offload-related
1153          * parts (especially calling ndo_setup_tc) on errors.
1154          */
1155         q->offload = true;
1156
1157         return 0;
1158 }
1159
1160 static void htb_attach_offload(struct Qdisc *sch)
1161 {
1162         struct net_device *dev = qdisc_dev(sch);
1163         struct htb_sched *q = qdisc_priv(sch);
1164         unsigned int ntx;
1165
1166         for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1167                 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1168
1169                 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1170                 qdisc_put(old);
1171                 qdisc_hash_add(qdisc, false);
1172         }
1173         for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1174                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1175                 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1176
1177                 qdisc_put(old);
1178         }
1179
1180         kfree(q->direct_qdiscs);
1181         q->direct_qdiscs = NULL;
1182 }
1183
1184 static void htb_attach_software(struct Qdisc *sch)
1185 {
1186         struct net_device *dev = qdisc_dev(sch);
1187         unsigned int ntx;
1188
1189         /* Resemble qdisc_graft behavior. */
1190         for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1191                 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1192                 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1193
1194                 qdisc_refcount_inc(sch);
1195
1196                 qdisc_put(old);
1197         }
1198 }
1199
1200 static void htb_attach(struct Qdisc *sch)
1201 {
1202         struct htb_sched *q = qdisc_priv(sch);
1203
1204         if (q->offload)
1205                 htb_attach_offload(sch);
1206         else
1207                 htb_attach_software(sch);
1208 }
1209
1210 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1211 {
1212         struct htb_sched *q = qdisc_priv(sch);
1213         struct nlattr *nest;
1214         struct tc_htb_glob gopt;
1215
1216         if (q->offload)
1217                 sch->flags |= TCQ_F_OFFLOADED;
1218         else
1219                 sch->flags &= ~TCQ_F_OFFLOADED;
1220
1221         sch->qstats.overlimits = q->overlimits;
1222         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1223          * no change can happen on the qdisc parameters.
1224          */
1225
1226         gopt.direct_pkts = q->direct_pkts;
1227         gopt.version = HTB_VER;
1228         gopt.rate2quantum = q->rate2quantum;
1229         gopt.defcls = q->defcls;
1230         gopt.debug = 0;
1231
1232         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1233         if (nest == NULL)
1234                 goto nla_put_failure;
1235         if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1236             nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1237                 goto nla_put_failure;
1238         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1239                 goto nla_put_failure;
1240
1241         return nla_nest_end(skb, nest);
1242
1243 nla_put_failure:
1244         nla_nest_cancel(skb, nest);
1245         return -1;
1246 }
1247
1248 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1249                           struct sk_buff *skb, struct tcmsg *tcm)
1250 {
1251         struct htb_class *cl = (struct htb_class *)arg;
1252         struct htb_sched *q = qdisc_priv(sch);
1253         struct nlattr *nest;
1254         struct tc_htb_opt opt;
1255
1256         /* Its safe to not acquire qdisc lock. As we hold RTNL,
1257          * no change can happen on the class parameters.
1258          */
1259         tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1260         tcm->tcm_handle = cl->common.classid;
1261         if (!cl->level && cl->leaf.q)
1262                 tcm->tcm_info = cl->leaf.q->handle;
1263
1264         nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1265         if (nest == NULL)
1266                 goto nla_put_failure;
1267
1268         memset(&opt, 0, sizeof(opt));
1269
1270         psched_ratecfg_getrate(&opt.rate, &cl->rate);
1271         opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1272         psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1273         opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1274         opt.quantum = cl->quantum;
1275         opt.prio = cl->prio;
1276         opt.level = cl->level;
1277         if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1278                 goto nla_put_failure;
1279         if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1280                 goto nla_put_failure;
1281         if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1282             nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1283                               TCA_HTB_PAD))
1284                 goto nla_put_failure;
1285         if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1286             nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1287                               TCA_HTB_PAD))
1288                 goto nla_put_failure;
1289
1290         return nla_nest_end(skb, nest);
1291
1292 nla_put_failure:
1293         nla_nest_cancel(skb, nest);
1294         return -1;
1295 }
1296
1297 static void htb_offload_aggregate_stats(struct htb_sched *q,
1298                                         struct htb_class *cl)
1299 {
1300         u64 bytes = 0, packets = 0;
1301         struct htb_class *c;
1302         unsigned int i;
1303
1304         gnet_stats_basic_sync_init(&cl->bstats);
1305
1306         for (i = 0; i < q->clhash.hashsize; i++) {
1307                 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1308                         struct htb_class *p = c;
1309
1310                         while (p && p->level < cl->level)
1311                                 p = p->parent;
1312
1313                         if (p != cl)
1314                                 continue;
1315
1316                         bytes += u64_stats_read(&c->bstats_bias.bytes);
1317                         packets += u64_stats_read(&c->bstats_bias.packets);
1318                         if (c->level == 0) {
1319                                 bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1320                                 packets += u64_stats_read(&c->leaf.q->bstats.packets);
1321                         }
1322                 }
1323         }
1324         _bstats_update(&cl->bstats, bytes, packets);
1325 }
1326
1327 static int
1328 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1329 {
1330         struct htb_class *cl = (struct htb_class *)arg;
1331         struct htb_sched *q = qdisc_priv(sch);
1332         struct gnet_stats_queue qs = {
1333                 .drops = cl->drops,
1334                 .overlimits = cl->overlimits,
1335         };
1336         __u32 qlen = 0;
1337
1338         if (!cl->level && cl->leaf.q)
1339                 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1340
1341         cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1342                                     INT_MIN, INT_MAX);
1343         cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1344                                      INT_MIN, INT_MAX);
1345
1346         if (q->offload) {
1347                 if (!cl->level) {
1348                         if (cl->leaf.q)
1349                                 cl->bstats = cl->leaf.q->bstats;
1350                         else
1351                                 gnet_stats_basic_sync_init(&cl->bstats);
1352                         _bstats_update(&cl->bstats,
1353                                        u64_stats_read(&cl->bstats_bias.bytes),
1354                                        u64_stats_read(&cl->bstats_bias.packets));
1355                 } else {
1356                         htb_offload_aggregate_stats(q, cl);
1357                 }
1358         }
1359
1360         if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1361             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1362             gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1363                 return -1;
1364
1365         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1366 }
1367
1368 static struct netdev_queue *
1369 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1370 {
1371         struct net_device *dev = qdisc_dev(sch);
1372         struct tc_htb_qopt_offload offload_opt;
1373         struct htb_sched *q = qdisc_priv(sch);
1374         int err;
1375
1376         if (!q->offload)
1377                 return sch->dev_queue;
1378
1379         offload_opt = (struct tc_htb_qopt_offload) {
1380                 .command = TC_HTB_LEAF_QUERY_QUEUE,
1381                 .classid = TC_H_MIN(tcm->tcm_parent),
1382         };
1383         err = htb_offload(dev, &offload_opt);
1384         if (err || offload_opt.qid >= dev->num_tx_queues)
1385                 return NULL;
1386         return netdev_get_tx_queue(dev, offload_opt.qid);
1387 }
1388
1389 static struct Qdisc *
1390 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1391 {
1392         struct net_device *dev = dev_queue->dev;
1393         struct Qdisc *old_q;
1394
1395         if (dev->flags & IFF_UP)
1396                 dev_deactivate(dev);
1397         old_q = dev_graft_qdisc(dev_queue, new_q);
1398         if (new_q)
1399                 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1400         if (dev->flags & IFF_UP)
1401                 dev_activate(dev);
1402
1403         return old_q;
1404 }
1405
1406 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1407 {
1408         struct netdev_queue *queue;
1409
1410         queue = cl->leaf.offload_queue;
1411         if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1412                 WARN_ON(cl->leaf.q->dev_queue != queue);
1413
1414         return queue;
1415 }
1416
1417 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1418                                    struct htb_class *cl_new, bool destroying)
1419 {
1420         struct netdev_queue *queue_old, *queue_new;
1421         struct net_device *dev = qdisc_dev(sch);
1422
1423         queue_old = htb_offload_get_queue(cl_old);
1424         queue_new = htb_offload_get_queue(cl_new);
1425
1426         if (!destroying) {
1427                 struct Qdisc *qdisc;
1428
1429                 if (dev->flags & IFF_UP)
1430                         dev_deactivate(dev);
1431                 qdisc = dev_graft_qdisc(queue_old, NULL);
1432                 WARN_ON(qdisc != cl_old->leaf.q);
1433         }
1434
1435         if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1436                 cl_old->leaf.q->dev_queue = queue_new;
1437         cl_old->leaf.offload_queue = queue_new;
1438
1439         if (!destroying) {
1440                 struct Qdisc *qdisc;
1441
1442                 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1443                 if (dev->flags & IFF_UP)
1444                         dev_activate(dev);
1445                 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1446         }
1447 }
1448
1449 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1450                      struct Qdisc **old, struct netlink_ext_ack *extack)
1451 {
1452         struct netdev_queue *dev_queue = sch->dev_queue;
1453         struct htb_class *cl = (struct htb_class *)arg;
1454         struct htb_sched *q = qdisc_priv(sch);
1455         struct Qdisc *old_q;
1456
1457         if (cl->level)
1458                 return -EINVAL;
1459
1460         if (q->offload)
1461                 dev_queue = htb_offload_get_queue(cl);
1462
1463         if (!new) {
1464                 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1465                                         cl->common.classid, extack);
1466                 if (!new)
1467                         return -ENOBUFS;
1468         }
1469
1470         if (q->offload) {
1471                 htb_set_lockdep_class_child(new);
1472                 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1473                 qdisc_refcount_inc(new);
1474                 old_q = htb_graft_helper(dev_queue, new);
1475         }
1476
1477         *old = qdisc_replace(sch, new, &cl->leaf.q);
1478
1479         if (q->offload) {
1480                 WARN_ON(old_q != *old);
1481                 qdisc_put(old_q);
1482         }
1483
1484         return 0;
1485 }
1486
1487 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1488 {
1489         struct htb_class *cl = (struct htb_class *)arg;
1490         return !cl->level ? cl->leaf.q : NULL;
1491 }
1492
1493 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1494 {
1495         struct htb_class *cl = (struct htb_class *)arg;
1496
1497         htb_deactivate(qdisc_priv(sch), cl);
1498 }
1499
1500 static inline int htb_parent_last_child(struct htb_class *cl)
1501 {
1502         if (!cl->parent)
1503                 /* the root class */
1504                 return 0;
1505         if (cl->parent->children > 1)
1506                 /* not the last child */
1507                 return 0;
1508         return 1;
1509 }
1510
1511 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1512                                struct Qdisc *new_q)
1513 {
1514         struct htb_sched *q = qdisc_priv(sch);
1515         struct htb_class *parent = cl->parent;
1516
1517         WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1518
1519         if (parent->cmode != HTB_CAN_SEND)
1520                 htb_safe_rb_erase(&parent->pq_node,
1521                                   &q->hlevel[parent->level].wait_pq);
1522
1523         parent->level = 0;
1524         memset(&parent->inner, 0, sizeof(parent->inner));
1525         parent->leaf.q = new_q ? new_q : &noop_qdisc;
1526         parent->tokens = parent->buffer;
1527         parent->ctokens = parent->cbuffer;
1528         parent->t_c = ktime_get_ns();
1529         parent->cmode = HTB_CAN_SEND;
1530         if (q->offload)
1531                 parent->leaf.offload_queue = cl->leaf.offload_queue;
1532 }
1533
1534 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1535                                        struct netdev_queue *dev_queue,
1536                                        struct Qdisc *new_q)
1537 {
1538         struct Qdisc *old_q;
1539
1540         /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1541         if (new_q)
1542                 qdisc_refcount_inc(new_q);
1543         old_q = htb_graft_helper(dev_queue, new_q);
1544         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1545 }
1546
1547 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1548                                      bool last_child, bool destroying,
1549                                      struct netlink_ext_ack *extack)
1550 {
1551         struct tc_htb_qopt_offload offload_opt;
1552         struct netdev_queue *dev_queue;
1553         struct Qdisc *q = cl->leaf.q;
1554         struct Qdisc *old;
1555         int err;
1556
1557         if (cl->level)
1558                 return -EINVAL;
1559
1560         WARN_ON(!q);
1561         dev_queue = htb_offload_get_queue(cl);
1562         /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1563          * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1564          * does not need to graft or qdisc_put the qdisc being destroyed.
1565          */
1566         if (!destroying) {
1567                 old = htb_graft_helper(dev_queue, NULL);
1568                 /* Last qdisc grafted should be the same as cl->leaf.q when
1569                  * calling htb_delete.
1570                  */
1571                 WARN_ON(old != q);
1572         }
1573
1574         if (cl->parent) {
1575                 _bstats_update(&cl->parent->bstats_bias,
1576                                u64_stats_read(&q->bstats.bytes),
1577                                u64_stats_read(&q->bstats.packets));
1578         }
1579
1580         offload_opt = (struct tc_htb_qopt_offload) {
1581                 .command = !last_child ? TC_HTB_LEAF_DEL :
1582                            destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1583                            TC_HTB_LEAF_DEL_LAST,
1584                 .classid = cl->common.classid,
1585                 .extack = extack,
1586         };
1587         err = htb_offload(qdisc_dev(sch), &offload_opt);
1588
1589         if (!destroying) {
1590                 if (!err)
1591                         qdisc_put(old);
1592                 else
1593                         htb_graft_helper(dev_queue, old);
1594         }
1595
1596         if (last_child)
1597                 return err;
1598
1599         if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1600                 u32 classid = TC_H_MAJ(sch->handle) |
1601                               TC_H_MIN(offload_opt.classid);
1602                 struct htb_class *moved_cl = htb_find(classid, sch);
1603
1604                 htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1605         }
1606
1607         return err;
1608 }
1609
1610 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1611 {
1612         if (!cl->level) {
1613                 WARN_ON(!cl->leaf.q);
1614                 qdisc_put(cl->leaf.q);
1615         }
1616         gen_kill_estimator(&cl->rate_est);
1617         tcf_block_put(cl->block);
1618         kfree(cl);
1619 }
1620
1621 static void htb_destroy(struct Qdisc *sch)
1622 {
1623         struct net_device *dev = qdisc_dev(sch);
1624         struct tc_htb_qopt_offload offload_opt;
1625         struct htb_sched *q = qdisc_priv(sch);
1626         struct hlist_node *next;
1627         bool nonempty, changed;
1628         struct htb_class *cl;
1629         unsigned int i;
1630
1631         cancel_work_sync(&q->work);
1632         qdisc_watchdog_cancel(&q->watchdog);
1633         /* This line used to be after htb_destroy_class call below
1634          * and surprisingly it worked in 2.4. But it must precede it
1635          * because filter need its target class alive to be able to call
1636          * unbind_filter on it (without Oops).
1637          */
1638         tcf_block_put(q->block);
1639
1640         for (i = 0; i < q->clhash.hashsize; i++) {
1641                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1642                         tcf_block_put(cl->block);
1643                         cl->block = NULL;
1644                 }
1645         }
1646
1647         do {
1648                 nonempty = false;
1649                 changed = false;
1650                 for (i = 0; i < q->clhash.hashsize; i++) {
1651                         hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1652                                                   common.hnode) {
1653                                 bool last_child;
1654
1655                                 if (!q->offload) {
1656                                         htb_destroy_class(sch, cl);
1657                                         continue;
1658                                 }
1659
1660                                 nonempty = true;
1661
1662                                 if (cl->level)
1663                                         continue;
1664
1665                                 changed = true;
1666
1667                                 last_child = htb_parent_last_child(cl);
1668                                 htb_destroy_class_offload(sch, cl, last_child,
1669                                                           true, NULL);
1670                                 qdisc_class_hash_remove(&q->clhash,
1671                                                         &cl->common);
1672                                 if (cl->parent)
1673                                         cl->parent->children--;
1674                                 if (last_child)
1675                                         htb_parent_to_leaf(sch, cl, NULL);
1676                                 htb_destroy_class(sch, cl);
1677                         }
1678                 }
1679         } while (changed);
1680         WARN_ON(nonempty);
1681
1682         qdisc_class_hash_destroy(&q->clhash);
1683         __qdisc_reset_queue(&q->direct_queue);
1684
1685         if (q->offload) {
1686                 offload_opt = (struct tc_htb_qopt_offload) {
1687                         .command = TC_HTB_DESTROY,
1688                 };
1689                 htb_offload(dev, &offload_opt);
1690         }
1691
1692         if (!q->direct_qdiscs)
1693                 return;
1694         for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1695                 qdisc_put(q->direct_qdiscs[i]);
1696         kfree(q->direct_qdiscs);
1697 }
1698
1699 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1700                       struct netlink_ext_ack *extack)
1701 {
1702         struct htb_sched *q = qdisc_priv(sch);
1703         struct htb_class *cl = (struct htb_class *)arg;
1704         struct Qdisc *new_q = NULL;
1705         int last_child = 0;
1706         int err;
1707
1708         /* TODO: why don't allow to delete subtree ? references ? does
1709          * tc subsys guarantee us that in htb_destroy it holds no class
1710          * refs so that we can remove children safely there ?
1711          */
1712         if (cl->children || qdisc_class_in_use(&cl->common)) {
1713                 NL_SET_ERR_MSG(extack, "HTB class in use");
1714                 return -EBUSY;
1715         }
1716
1717         if (!cl->level && htb_parent_last_child(cl))
1718                 last_child = 1;
1719
1720         if (q->offload) {
1721                 err = htb_destroy_class_offload(sch, cl, last_child, false,
1722                                                 extack);
1723                 if (err)
1724                         return err;
1725         }
1726
1727         if (last_child) {
1728                 struct netdev_queue *dev_queue = sch->dev_queue;
1729
1730                 if (q->offload)
1731                         dev_queue = htb_offload_get_queue(cl);
1732
1733                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1734                                           cl->parent->common.classid,
1735                                           NULL);
1736                 if (q->offload) {
1737                         if (new_q)
1738                                 htb_set_lockdep_class_child(new_q);
1739                         htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1740                 }
1741         }
1742
1743         sch_tree_lock(sch);
1744
1745         if (!cl->level)
1746                 qdisc_purge_queue(cl->leaf.q);
1747
1748         /* delete from hash and active; remainder in destroy_class */
1749         qdisc_class_hash_remove(&q->clhash, &cl->common);
1750         if (cl->parent)
1751                 cl->parent->children--;
1752
1753         if (cl->prio_activity)
1754                 htb_deactivate(q, cl);
1755
1756         if (cl->cmode != HTB_CAN_SEND)
1757                 htb_safe_rb_erase(&cl->pq_node,
1758                                   &q->hlevel[cl->level].wait_pq);
1759
1760         if (last_child)
1761                 htb_parent_to_leaf(sch, cl, new_q);
1762
1763         sch_tree_unlock(sch);
1764
1765         htb_destroy_class(sch, cl);
1766         return 0;
1767 }
1768
1769 static int htb_change_class(struct Qdisc *sch, u32 classid,
1770                             u32 parentid, struct nlattr **tca,
1771                             unsigned long *arg, struct netlink_ext_ack *extack)
1772 {
1773         int err = -EINVAL;
1774         struct htb_sched *q = qdisc_priv(sch);
1775         struct htb_class *cl = (struct htb_class *)*arg, *parent;
1776         struct tc_htb_qopt_offload offload_opt;
1777         struct nlattr *opt = tca[TCA_OPTIONS];
1778         struct nlattr *tb[TCA_HTB_MAX + 1];
1779         struct Qdisc *parent_qdisc = NULL;
1780         struct netdev_queue *dev_queue;
1781         struct tc_htb_opt *hopt;
1782         u64 rate64, ceil64;
1783         int warn = 0;
1784
1785         /* extract all subattrs from opt attr */
1786         if (!opt)
1787                 goto failure;
1788
1789         err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1790                                           extack);
1791         if (err < 0)
1792                 goto failure;
1793
1794         err = -EINVAL;
1795         if (tb[TCA_HTB_PARMS] == NULL)
1796                 goto failure;
1797
1798         parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1799
1800         hopt = nla_data(tb[TCA_HTB_PARMS]);
1801         if (!hopt->rate.rate || !hopt->ceil.rate)
1802                 goto failure;
1803
1804         if (q->offload) {
1805                 /* Options not supported by the offload. */
1806                 if (hopt->rate.overhead || hopt->ceil.overhead) {
1807                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1808                         goto failure;
1809                 }
1810                 if (hopt->rate.mpu || hopt->ceil.mpu) {
1811                         NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1812                         goto failure;
1813                 }
1814         }
1815
1816         /* Keeping backward compatible with rate_table based iproute2 tc */
1817         if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1818                 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1819                                               NULL));
1820
1821         if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1822                 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1823                                               NULL));
1824
1825         rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1826         ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1827
1828         if (!cl) {              /* new class */
1829                 struct net_device *dev = qdisc_dev(sch);
1830                 struct Qdisc *new_q, *old_q;
1831                 int prio;
1832                 struct {
1833                         struct nlattr           nla;
1834                         struct gnet_estimator   opt;
1835                 } est = {
1836                         .nla = {
1837                                 .nla_len        = nla_attr_size(sizeof(est.opt)),
1838                                 .nla_type       = TCA_RATE,
1839                         },
1840                         .opt = {
1841                                 /* 4s interval, 16s averaging constant */
1842                                 .interval       = 2,
1843                                 .ewma_log       = 2,
1844                         },
1845                 };
1846
1847                 /* check for valid classid */
1848                 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1849                     htb_find(classid, sch))
1850                         goto failure;
1851
1852                 /* check maximal depth */
1853                 if (parent && parent->parent && parent->parent->level < 2) {
1854                         NL_SET_ERR_MSG_MOD(extack, "tree is too deep");
1855                         goto failure;
1856                 }
1857                 err = -ENOBUFS;
1858                 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1859                 if (!cl)
1860                         goto failure;
1861
1862                 gnet_stats_basic_sync_init(&cl->bstats);
1863                 gnet_stats_basic_sync_init(&cl->bstats_bias);
1864
1865                 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1866                 if (err) {
1867                         kfree(cl);
1868                         goto failure;
1869                 }
1870                 if (htb_rate_est || tca[TCA_RATE]) {
1871                         err = gen_new_estimator(&cl->bstats, NULL,
1872                                                 &cl->rate_est,
1873                                                 NULL,
1874                                                 true,
1875                                                 tca[TCA_RATE] ? : &est.nla);
1876                         if (err)
1877                                 goto err_block_put;
1878                 }
1879
1880                 cl->children = 0;
1881                 RB_CLEAR_NODE(&cl->pq_node);
1882
1883                 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1884                         RB_CLEAR_NODE(&cl->node[prio]);
1885
1886                 cl->common.classid = classid;
1887
1888                 /* Make sure nothing interrupts us in between of two
1889                  * ndo_setup_tc calls.
1890                  */
1891                 ASSERT_RTNL();
1892
1893                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1894                  * so that can't be used inside of sch_tree_lock
1895                  * -- thanks to Karlis Peisenieks
1896                  */
1897                 if (!q->offload) {
1898                         dev_queue = sch->dev_queue;
1899                 } else if (!(parent && !parent->level)) {
1900                         /* Assign a dev_queue to this classid. */
1901                         offload_opt = (struct tc_htb_qopt_offload) {
1902                                 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1903                                 .classid = cl->common.classid,
1904                                 .parent_classid = parent ?
1905                                         TC_H_MIN(parent->common.classid) :
1906                                         TC_HTB_CLASSID_ROOT,
1907                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1908                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1909                                 .prio = hopt->prio,
1910                                 .quantum = hopt->quantum,
1911                                 .extack = extack,
1912                         };
1913                         err = htb_offload(dev, &offload_opt);
1914                         if (err) {
1915                                 NL_SET_ERR_MSG_WEAK(extack,
1916                                                     "Failed to offload TC_HTB_LEAF_ALLOC_QUEUE");
1917                                 goto err_kill_estimator;
1918                         }
1919                         dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1920                 } else { /* First child. */
1921                         dev_queue = htb_offload_get_queue(parent);
1922                         old_q = htb_graft_helper(dev_queue, NULL);
1923                         WARN_ON(old_q != parent->leaf.q);
1924                         offload_opt = (struct tc_htb_qopt_offload) {
1925                                 .command = TC_HTB_LEAF_TO_INNER,
1926                                 .classid = cl->common.classid,
1927                                 .parent_classid =
1928                                         TC_H_MIN(parent->common.classid),
1929                                 .rate = max_t(u64, hopt->rate.rate, rate64),
1930                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1931                                 .prio = hopt->prio,
1932                                 .quantum = hopt->quantum,
1933                                 .extack = extack,
1934                         };
1935                         err = htb_offload(dev, &offload_opt);
1936                         if (err) {
1937                                 NL_SET_ERR_MSG_WEAK(extack,
1938                                                     "Failed to offload TC_HTB_LEAF_TO_INNER");
1939                                 htb_graft_helper(dev_queue, old_q);
1940                                 goto err_kill_estimator;
1941                         }
1942                         _bstats_update(&parent->bstats_bias,
1943                                        u64_stats_read(&old_q->bstats.bytes),
1944                                        u64_stats_read(&old_q->bstats.packets));
1945                         qdisc_put(old_q);
1946                 }
1947                 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1948                                           classid, NULL);
1949                 if (q->offload) {
1950                         if (new_q) {
1951                                 htb_set_lockdep_class_child(new_q);
1952                                 /* One ref for cl->leaf.q, the other for
1953                                  * dev_queue->qdisc.
1954                                  */
1955                                 qdisc_refcount_inc(new_q);
1956                         }
1957                         old_q = htb_graft_helper(dev_queue, new_q);
1958                         /* No qdisc_put needed. */
1959                         WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1960                 }
1961                 sch_tree_lock(sch);
1962                 if (parent && !parent->level) {
1963                         /* turn parent into inner node */
1964                         qdisc_purge_queue(parent->leaf.q);
1965                         parent_qdisc = parent->leaf.q;
1966                         if (parent->prio_activity)
1967                                 htb_deactivate(q, parent);
1968
1969                         /* remove from evt list because of level change */
1970                         if (parent->cmode != HTB_CAN_SEND) {
1971                                 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1972                                 parent->cmode = HTB_CAN_SEND;
1973                         }
1974                         parent->level = (parent->parent ? parent->parent->level
1975                                          : TC_HTB_MAXDEPTH) - 1;
1976                         memset(&parent->inner, 0, sizeof(parent->inner));
1977                 }
1978
1979                 /* leaf (we) needs elementary qdisc */
1980                 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1981                 if (q->offload)
1982                         cl->leaf.offload_queue = dev_queue;
1983
1984                 cl->parent = parent;
1985
1986                 /* set class to be in HTB_CAN_SEND state */
1987                 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1988                 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1989                 cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1990                 cl->t_c = ktime_get_ns();
1991                 cl->cmode = HTB_CAN_SEND;
1992
1993                 /* attach to the hash list and parent's family */
1994                 qdisc_class_hash_insert(&q->clhash, &cl->common);
1995                 if (parent)
1996                         parent->children++;
1997                 if (cl->leaf.q != &noop_qdisc)
1998                         qdisc_hash_add(cl->leaf.q, true);
1999         } else {
2000                 if (tca[TCA_RATE]) {
2001                         err = gen_replace_estimator(&cl->bstats, NULL,
2002                                                     &cl->rate_est,
2003                                                     NULL,
2004                                                     true,
2005                                                     tca[TCA_RATE]);
2006                         if (err)
2007                                 return err;
2008                 }
2009
2010                 if (q->offload) {
2011                         struct net_device *dev = qdisc_dev(sch);
2012
2013                         offload_opt = (struct tc_htb_qopt_offload) {
2014                                 .command = TC_HTB_NODE_MODIFY,
2015                                 .classid = cl->common.classid,
2016                                 .rate = max_t(u64, hopt->rate.rate, rate64),
2017                                 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
2018                                 .prio = hopt->prio,
2019                                 .quantum = hopt->quantum,
2020                                 .extack = extack,
2021                         };
2022                         err = htb_offload(dev, &offload_opt);
2023                         if (err)
2024                                 /* Estimator was replaced, and rollback may fail
2025                                  * as well, so we don't try to recover it, and
2026                                  * the estimator won't work property with the
2027                                  * offload anyway, because bstats are updated
2028                                  * only when the stats are queried.
2029                                  */
2030                                 return err;
2031                 }
2032
2033                 sch_tree_lock(sch);
2034         }
2035
2036         psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2037         psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2038
2039         /* it used to be a nasty bug here, we have to check that node
2040          * is really leaf before changing cl->leaf !
2041          */
2042         if (!cl->level) {
2043                 u64 quantum = cl->rate.rate_bytes_ps;
2044
2045                 do_div(quantum, q->rate2quantum);
2046                 cl->quantum = min_t(u64, quantum, INT_MAX);
2047
2048                 if (!hopt->quantum && cl->quantum < 1000) {
2049                         warn = -1;
2050                         cl->quantum = 1000;
2051                 }
2052                 if (!hopt->quantum && cl->quantum > 200000) {
2053                         warn = 1;
2054                         cl->quantum = 200000;
2055                 }
2056                 if (hopt->quantum)
2057                         cl->quantum = hopt->quantum;
2058                 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2059                         cl->prio = TC_HTB_NUMPRIO - 1;
2060         }
2061
2062         cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2063         cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2064
2065         sch_tree_unlock(sch);
2066         qdisc_put(parent_qdisc);
2067
2068         if (warn)
2069                 NL_SET_ERR_MSG_FMT_MOD(extack,
2070                                        "quantum of class %X is %s. Consider r2q change.",
2071                                        cl->common.classid, (warn == -1 ? "small" : "big"));
2072
2073         qdisc_class_hash_grow(sch, &q->clhash);
2074
2075         *arg = (unsigned long)cl;
2076         return 0;
2077
2078 err_kill_estimator:
2079         gen_kill_estimator(&cl->rate_est);
2080 err_block_put:
2081         tcf_block_put(cl->block);
2082         kfree(cl);
2083 failure:
2084         return err;
2085 }
2086
2087 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2088                                        struct netlink_ext_ack *extack)
2089 {
2090         struct htb_sched *q = qdisc_priv(sch);
2091         struct htb_class *cl = (struct htb_class *)arg;
2092
2093         return cl ? cl->block : q->block;
2094 }
2095
2096 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2097                                      u32 classid)
2098 {
2099         struct htb_class *cl = htb_find(classid, sch);
2100
2101         /*if (cl && !cl->level) return 0;
2102          * The line above used to be there to prevent attaching filters to
2103          * leaves. But at least tc_index filter uses this just to get class
2104          * for other reasons so that we have to allow for it.
2105          * ----
2106          * 19.6.2002 As Werner explained it is ok - bind filter is just
2107          * another way to "lock" the class - unlike "get" this lock can
2108          * be broken by class during destroy IIUC.
2109          */
2110         if (cl)
2111                 qdisc_class_get(&cl->common);
2112         return (unsigned long)cl;
2113 }
2114
2115 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2116 {
2117         struct htb_class *cl = (struct htb_class *)arg;
2118
2119         qdisc_class_put(&cl->common);
2120 }
2121
2122 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2123 {
2124         struct htb_sched *q = qdisc_priv(sch);
2125         struct htb_class *cl;
2126         unsigned int i;
2127
2128         if (arg->stop)
2129                 return;
2130
2131         for (i = 0; i < q->clhash.hashsize; i++) {
2132                 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2133                         if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2134                                 return;
2135                 }
2136         }
2137 }
2138
2139 static const struct Qdisc_class_ops htb_class_ops = {
2140         .select_queue   =       htb_select_queue,
2141         .graft          =       htb_graft,
2142         .leaf           =       htb_leaf,
2143         .qlen_notify    =       htb_qlen_notify,
2144         .find           =       htb_search,
2145         .change         =       htb_change_class,
2146         .delete         =       htb_delete,
2147         .walk           =       htb_walk,
2148         .tcf_block      =       htb_tcf_block,
2149         .bind_tcf       =       htb_bind_filter,
2150         .unbind_tcf     =       htb_unbind_filter,
2151         .dump           =       htb_dump_class,
2152         .dump_stats     =       htb_dump_class_stats,
2153 };
2154
2155 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2156         .cl_ops         =       &htb_class_ops,
2157         .id             =       "htb",
2158         .priv_size      =       sizeof(struct htb_sched),
2159         .enqueue        =       htb_enqueue,
2160         .dequeue        =       htb_dequeue,
2161         .peek           =       qdisc_peek_dequeued,
2162         .init           =       htb_init,
2163         .attach         =       htb_attach,
2164         .reset          =       htb_reset,
2165         .destroy        =       htb_destroy,
2166         .dump           =       htb_dump,
2167         .owner          =       THIS_MODULE,
2168 };
2169
2170 static int __init htb_module_init(void)
2171 {
2172         return register_qdisc(&htb_qdisc_ops);
2173 }
2174 static void __exit htb_module_exit(void)
2175 {
2176         unregister_qdisc(&htb_qdisc_ops);
2177 }
2178
2179 module_init(htb_module_init)
2180 module_exit(htb_module_exit)
2181 MODULE_LICENSE("GPL");
2182 MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler");