2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
34 #include <net/net_namespace.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
45 This file consists of two interrelated parts:
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
69 All real intelligent work is done inside qdisc modules.
73 Every discipline has two major routines: enqueue and dequeue.
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
98 like dequeue but without removing a packet from the queue
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
107 initializes newly created qdisc.
111 destroys resources allocated by init and during lifetime of qdisc.
115 changes qdisc parameters.
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
122 /************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
127 /* The list of all installed queueing disciplines. */
129 static struct Qdisc_ops *qdisc_base;
131 /* Register/unregister queueing discipline */
133 int register_qdisc(struct Qdisc_ops *qops)
135 struct Qdisc_ops *q, **qp;
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
145 if (qops->peek == NULL) {
146 if (qops->dequeue == NULL)
147 qops->peek = noop_qdisc_ops.peek;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
157 if (!(cops->find && cops->walk && cops->leaf))
160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
168 write_unlock(&qdisc_mod_lock);
175 EXPORT_SYMBOL(register_qdisc);
177 int unregister_qdisc(struct Qdisc_ops *qops)
179 struct Qdisc_ops *q, **qp;
182 write_lock(&qdisc_mod_lock);
183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
191 write_unlock(&qdisc_mod_lock);
194 EXPORT_SYMBOL(unregister_qdisc);
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
199 read_lock(&qdisc_mod_lock);
200 strlcpy(name, default_qdisc_ops->id, len);
201 read_unlock(&qdisc_mod_lock);
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 struct Qdisc_ops *q = NULL;
208 for (q = qdisc_base; q; q = q->next) {
209 if (!strcmp(name, q->id)) {
210 if (!try_module_get(q->owner))
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
222 const struct Qdisc_ops *ops;
224 if (!capable(CAP_NET_ADMIN))
227 write_lock(&qdisc_mod_lock);
228 ops = qdisc_lookup_default(name);
230 /* Not found, drop lock and try to load module */
231 write_unlock(&qdisc_mod_lock);
232 request_module("sch_%s", name);
233 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
239 /* Set new default */
240 module_put(default_qdisc_ops->owner);
241 default_qdisc_ops = ops;
243 write_unlock(&qdisc_mod_lock);
245 return ops ? 0 : -ENOENT;
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 late_initcall(sch_default_qdisc);
257 /* We know handle. Find qdisc among all qdisc's attached to device
258 * (root qdisc, all its children, children of children etc.)
259 * Note: caller either uses rtnl or rcu_read_lock()
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
266 if (!qdisc_dev(root))
267 return (root->handle == handle ? root : NULL);
269 if (!(root->flags & TCQ_F_BUILTIN) &&
270 root->handle == handle)
273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274 if (q->handle == handle)
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
286 q->flags |= TCQ_F_INVISIBLE;
289 EXPORT_SYMBOL(qdisc_hash_add);
291 void qdisc_hash_del(struct Qdisc *q)
293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
295 hash_del_rcu(&q->hash);
298 EXPORT_SYMBOL(qdisc_hash_del);
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
306 q = qdisc_match_from_root(dev->qdisc, handle);
310 if (dev_ingress_queue(dev))
311 q = qdisc_match_from_root(
312 dev_ingress_queue(dev)->qdisc_sleeping,
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
322 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
326 cl = cops->find(p, classid);
330 leaf = cops->leaf(p, cl);
334 /* Find queueing discipline by name */
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
338 struct Qdisc_ops *q = NULL;
341 read_lock(&qdisc_mod_lock);
342 for (q = qdisc_base; q; q = q->next) {
343 if (nla_strcmp(kind, q->id) == 0) {
344 if (!try_module_get(q->owner))
349 read_unlock(&qdisc_mod_lock);
354 /* The linklayer setting were not transferred from iproute2, in older
355 * versions, and the rate tables lookup systems have been dropped in
356 * the kernel. To keep backward compatible with older iproute2 tc
357 * utils, we detect the linklayer setting by detecting if the rate
358 * table were modified.
360 * For linklayer ATM table entries, the rate table will be aligned to
361 * 48 bytes, thus some table entries will contain the same value. The
362 * mpu (min packet unit) is also encoded into the old rate table, thus
363 * starting from the mpu, we find low and high table entries for
364 * mapping this cell. If these entries contain the same value, when
365 * the rate tables have been modified for linklayer ATM.
367 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368 * and then roundup to the next cell, calc the table entry one below,
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
373 int low = roundup(r->mpu, 48);
374 int high = roundup(low+1, 48);
375 int cell_low = low >> r->cell_log;
376 int cell_high = (high >> r->cell_log) - 1;
378 /* rtab is too inaccurate at rates > 100Mbit/s */
379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380 pr_debug("TC linklayer: Giving up ATM detection\n");
381 return TC_LINKLAYER_ETHERNET;
384 if ((cell_high > cell_low) && (cell_high < 256)
385 && (rtab[cell_low] == rtab[cell_high])) {
386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387 cell_low, cell_high, rtab[cell_high]);
388 return TC_LINKLAYER_ATM;
390 return TC_LINKLAYER_ETHERNET;
393 static struct qdisc_rate_table *qdisc_rtab_list;
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
398 struct qdisc_rate_table *rtab;
400 if (tab == NULL || r->rate == 0 ||
401 r->cell_log == 0 || r->cell_log >= 32 ||
402 nla_len(tab) != TC_RTAB_SIZE)
405 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
406 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
407 !memcmp(&rtab->data, nla_data(tab), 1024)) {
413 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
417 memcpy(rtab->data, nla_data(tab), 1024);
418 if (r->linklayer == TC_LINKLAYER_UNAWARE)
419 r->linklayer = __detect_linklayer(r, rtab->data);
420 rtab->next = qdisc_rtab_list;
421 qdisc_rtab_list = rtab;
425 EXPORT_SYMBOL(qdisc_get_rtab);
427 void qdisc_put_rtab(struct qdisc_rate_table *tab)
429 struct qdisc_rate_table *rtab, **rtabp;
431 if (!tab || --tab->refcnt)
434 for (rtabp = &qdisc_rtab_list;
435 (rtab = *rtabp) != NULL;
436 rtabp = &rtab->next) {
444 EXPORT_SYMBOL(qdisc_put_rtab);
446 static LIST_HEAD(qdisc_stab_list);
448 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
449 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
450 [TCA_STAB_DATA] = { .type = NLA_BINARY },
453 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
455 struct nlattr *tb[TCA_STAB_MAX + 1];
456 struct qdisc_size_table *stab;
457 struct tc_sizespec *s;
458 unsigned int tsize = 0;
462 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
465 if (!tb[TCA_STAB_BASE])
466 return ERR_PTR(-EINVAL);
468 s = nla_data(tb[TCA_STAB_BASE]);
471 if (!tb[TCA_STAB_DATA])
472 return ERR_PTR(-EINVAL);
473 tab = nla_data(tb[TCA_STAB_DATA]);
474 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
477 if (tsize != s->tsize || (!tab && tsize > 0))
478 return ERR_PTR(-EINVAL);
480 list_for_each_entry(stab, &qdisc_stab_list, list) {
481 if (memcmp(&stab->szopts, s, sizeof(*s)))
483 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
489 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
491 return ERR_PTR(-ENOMEM);
496 memcpy(stab->data, tab, tsize * sizeof(u16));
498 list_add_tail(&stab->list, &qdisc_stab_list);
503 static void stab_kfree_rcu(struct rcu_head *head)
505 kfree(container_of(head, struct qdisc_size_table, rcu));
508 void qdisc_put_stab(struct qdisc_size_table *tab)
513 if (--tab->refcnt == 0) {
514 list_del(&tab->list);
515 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
518 EXPORT_SYMBOL(qdisc_put_stab);
520 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
524 nest = nla_nest_start(skb, TCA_STAB);
526 goto nla_put_failure;
527 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
528 goto nla_put_failure;
529 nla_nest_end(skb, nest);
537 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
538 const struct qdisc_size_table *stab)
542 pkt_len = skb->len + stab->szopts.overhead;
543 if (unlikely(!stab->szopts.tsize))
546 slot = pkt_len + stab->szopts.cell_align;
547 if (unlikely(slot < 0))
550 slot >>= stab->szopts.cell_log;
551 if (likely(slot < stab->szopts.tsize))
552 pkt_len = stab->data[slot];
554 pkt_len = stab->data[stab->szopts.tsize - 1] *
555 (slot / stab->szopts.tsize) +
556 stab->data[slot % stab->szopts.tsize];
558 pkt_len <<= stab->szopts.size_log;
560 if (unlikely(pkt_len < 1))
562 qdisc_skb_cb(skb)->pkt_len = pkt_len;
564 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
566 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
568 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
569 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
570 txt, qdisc->ops->id, qdisc->handle >> 16);
571 qdisc->flags |= TCQ_F_WARN_NONWC;
574 EXPORT_SYMBOL(qdisc_warn_nonwc);
576 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
578 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
582 __netif_schedule(qdisc_root(wd->qdisc));
585 return HRTIMER_NORESTART;
588 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
590 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
591 wd->timer.function = qdisc_watchdog;
594 EXPORT_SYMBOL(qdisc_watchdog_init);
596 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
598 if (test_bit(__QDISC_STATE_DEACTIVATED,
599 &qdisc_root_sleeping(wd->qdisc)->state))
602 if (wd->last_expires == expires)
605 wd->last_expires = expires;
606 hrtimer_start(&wd->timer,
607 ns_to_ktime(expires),
608 HRTIMER_MODE_ABS_PINNED);
610 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
612 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
614 hrtimer_cancel(&wd->timer);
616 EXPORT_SYMBOL(qdisc_watchdog_cancel);
618 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
620 struct hlist_head *h;
623 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
626 for (i = 0; i < n; i++)
627 INIT_HLIST_HEAD(&h[i]);
632 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
634 struct Qdisc_class_common *cl;
635 struct hlist_node *next;
636 struct hlist_head *nhash, *ohash;
637 unsigned int nsize, nmask, osize;
640 /* Rehash when load factor exceeds 0.75 */
641 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
643 nsize = clhash->hashsize * 2;
645 nhash = qdisc_class_hash_alloc(nsize);
649 ohash = clhash->hash;
650 osize = clhash->hashsize;
653 for (i = 0; i < osize; i++) {
654 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
655 h = qdisc_class_hash(cl->classid, nmask);
656 hlist_add_head(&cl->hnode, &nhash[h]);
659 clhash->hash = nhash;
660 clhash->hashsize = nsize;
661 clhash->hashmask = nmask;
662 sch_tree_unlock(sch);
666 EXPORT_SYMBOL(qdisc_class_hash_grow);
668 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
670 unsigned int size = 4;
672 clhash->hash = qdisc_class_hash_alloc(size);
673 if (clhash->hash == NULL)
675 clhash->hashsize = size;
676 clhash->hashmask = size - 1;
677 clhash->hashelems = 0;
680 EXPORT_SYMBOL(qdisc_class_hash_init);
682 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
684 kvfree(clhash->hash);
686 EXPORT_SYMBOL(qdisc_class_hash_destroy);
688 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
689 struct Qdisc_class_common *cl)
693 INIT_HLIST_NODE(&cl->hnode);
694 h = qdisc_class_hash(cl->classid, clhash->hashmask);
695 hlist_add_head(&cl->hnode, &clhash->hash[h]);
698 EXPORT_SYMBOL(qdisc_class_hash_insert);
700 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
701 struct Qdisc_class_common *cl)
703 hlist_del(&cl->hnode);
706 EXPORT_SYMBOL(qdisc_class_hash_remove);
708 /* Allocate an unique handle from space managed by kernel
709 * Possible range is [8000-FFFF]:0000 (0x8000 values)
711 static u32 qdisc_alloc_handle(struct net_device *dev)
714 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
717 autohandle += TC_H_MAKE(0x10000U, 0);
718 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
719 autohandle = TC_H_MAKE(0x80000000U, 0);
720 if (!qdisc_lookup(dev, autohandle))
728 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
731 const struct Qdisc_class_ops *cops;
737 if (n == 0 && len == 0)
739 drops = max_t(int, n, 0);
741 while ((parentid = sch->parent)) {
742 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
745 if (sch->flags & TCQ_F_NOPARENT)
747 /* Notify parent qdisc only if child qdisc becomes empty.
749 * If child was empty even before update then backlog
750 * counter is screwed and we skip notification because
751 * parent class is already passive.
753 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
754 /* TODO: perform the search on a per txq basis */
755 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
757 WARN_ON_ONCE(parentid != TC_H_ROOT);
760 cops = sch->ops->cl_ops;
761 if (notify && cops->qlen_notify) {
762 cl = cops->find(sch, parentid);
763 cops->qlen_notify(sch, cl);
766 sch->qstats.backlog -= len;
767 __qdisc_qstats_drop(sch, drops);
771 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
773 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
774 u32 portid, u32 seq, u16 flags, int event)
776 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
777 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
779 struct nlmsghdr *nlh;
780 unsigned char *b = skb_tail_pointer(skb);
782 struct qdisc_size_table *stab;
786 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
789 tcm = nlmsg_data(nlh);
790 tcm->tcm_family = AF_UNSPEC;
793 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
794 tcm->tcm_parent = clid;
795 tcm->tcm_handle = q->handle;
796 tcm->tcm_info = refcount_read(&q->refcnt);
797 if (nla_put_string(skb, TCA_KIND, q->ops->id))
798 goto nla_put_failure;
799 if (q->ops->dump && q->ops->dump(q, skb) < 0)
800 goto nla_put_failure;
803 stab = rtnl_dereference(q->stab);
804 if (stab && qdisc_dump_stab(skb, stab) < 0)
805 goto nla_put_failure;
807 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
808 NULL, &d, TCA_PAD) < 0)
809 goto nla_put_failure;
811 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
812 goto nla_put_failure;
814 if (qdisc_is_percpu_stats(q)) {
815 cpu_bstats = q->cpu_bstats;
816 cpu_qstats = q->cpu_qstats;
819 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
820 &d, cpu_bstats, &q->bstats) < 0 ||
821 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
822 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
823 goto nla_put_failure;
825 if (gnet_stats_finish_copy(&d) < 0)
826 goto nla_put_failure;
828 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
837 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
839 if (q->flags & TCQ_F_BUILTIN)
841 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
847 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
848 struct nlmsghdr *n, u32 clid,
849 struct Qdisc *old, struct Qdisc *new)
852 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
854 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
858 if (old && !tc_qdisc_dump_ignore(old, false)) {
859 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
860 0, RTM_DELQDISC) < 0)
863 if (new && !tc_qdisc_dump_ignore(new, false)) {
864 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
865 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
870 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
871 n->nlmsg_flags & NLM_F_ECHO);
878 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
879 struct nlmsghdr *n, u32 clid,
880 struct Qdisc *old, struct Qdisc *new)
883 qdisc_notify(net, skb, n, clid, old, new);
889 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
892 * When appropriate send a netlink notification using 'skb'
895 * On success, destroy old qdisc.
898 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
899 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
900 struct Qdisc *new, struct Qdisc *old)
902 struct Qdisc *q = old;
903 struct net *net = dev_net(dev);
906 if (parent == NULL) {
907 unsigned int i, num_q, ingress;
910 num_q = dev->num_tx_queues;
911 if ((q && q->flags & TCQ_F_INGRESS) ||
912 (new && new->flags & TCQ_F_INGRESS)) {
915 if (!dev_ingress_queue(dev))
919 if (dev->flags & IFF_UP)
922 if (new && new->ops->attach)
925 for (i = 0; i < num_q; i++) {
926 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
929 dev_queue = netdev_get_tx_queue(dev, i);
931 old = dev_graft_qdisc(dev_queue, new);
933 qdisc_refcount_inc(new);
941 notify_and_destroy(net, skb, n, classid,
943 if (new && !new->ops->attach)
944 qdisc_refcount_inc(new);
945 dev->qdisc = new ? : &noop_qdisc;
947 if (new && new->ops->attach)
948 new->ops->attach(new);
950 notify_and_destroy(net, skb, n, classid, old, new);
953 if (dev->flags & IFF_UP)
956 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
959 if (cops && cops->graft) {
960 unsigned long cl = cops->find(parent, classid);
963 if (new && new->ops == &noqueue_qdisc_ops)
966 err = cops->graft(parent, cl, new, &old);
972 notify_and_destroy(net, skb, n, classid, old, new);
977 /* lockdep annotation is needed for ingress; egress gets it only for name */
978 static struct lock_class_key qdisc_tx_lock;
979 static struct lock_class_key qdisc_rx_lock;
982 Allocate and initialize new qdisc.
984 Parameters are passed via opt.
987 static struct Qdisc *qdisc_create(struct net_device *dev,
988 struct netdev_queue *dev_queue,
989 struct Qdisc *p, u32 parent, u32 handle,
990 struct nlattr **tca, int *errp)
993 struct nlattr *kind = tca[TCA_KIND];
995 struct Qdisc_ops *ops;
996 struct qdisc_size_table *stab;
998 ops = qdisc_lookup_ops(kind);
999 #ifdef CONFIG_MODULES
1000 if (ops == NULL && kind != NULL) {
1001 char name[IFNAMSIZ];
1002 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1003 /* We dropped the RTNL semaphore in order to
1004 * perform the module load. So, even if we
1005 * succeeded in loading the module we have to
1006 * tell the caller to replay the request. We
1007 * indicate this using -EAGAIN.
1008 * We replay the request because the device may
1009 * go away in the mean time.
1012 request_module("sch_%s", name);
1014 ops = qdisc_lookup_ops(kind);
1016 /* We will try again qdisc_lookup_ops,
1017 * so don't keep a reference.
1019 module_put(ops->owner);
1031 sch = qdisc_alloc(dev_queue, ops);
1037 sch->parent = parent;
1039 if (handle == TC_H_INGRESS) {
1040 sch->flags |= TCQ_F_INGRESS;
1041 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1042 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1045 handle = qdisc_alloc_handle(dev);
1050 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1051 if (!netif_is_multiqueue(dev))
1052 sch->flags |= TCQ_F_ONETXQUEUE;
1055 sch->handle = handle;
1057 /* This exist to keep backward compatible with a userspace
1058 * loophole, what allowed userspace to get IFF_NO_QUEUE
1059 * facility on older kernels by setting tx_queue_len=0 (prior
1060 * to qdisc init), and then forgot to reinit tx_queue_len
1061 * before again attaching a qdisc.
1063 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1064 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1065 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1068 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1069 if (qdisc_is_percpu_stats(sch)) {
1071 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1072 if (!sch->cpu_bstats)
1075 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1076 if (!sch->cpu_qstats)
1080 if (tca[TCA_STAB]) {
1081 stab = qdisc_get_stab(tca[TCA_STAB]);
1083 err = PTR_ERR(stab);
1086 rcu_assign_pointer(sch->stab, stab);
1088 if (tca[TCA_RATE]) {
1089 seqcount_t *running;
1092 if (sch->flags & TCQ_F_MQROOT)
1095 if ((sch->parent != TC_H_ROOT) &&
1096 !(sch->flags & TCQ_F_INGRESS) &&
1097 (!p || !(p->flags & TCQ_F_MQROOT)))
1098 running = qdisc_root_sleeping_running(sch);
1100 running = &sch->running;
1102 err = gen_new_estimator(&sch->bstats,
1112 qdisc_hash_add(sch, false);
1116 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1121 kfree((char *) sch - sch->padded);
1123 module_put(ops->owner);
1129 free_percpu(sch->cpu_bstats);
1130 free_percpu(sch->cpu_qstats);
1132 * Any broken qdiscs that would require a ops->reset() here?
1133 * The qdisc was never in action so it shouldn't be necessary.
1135 qdisc_put_stab(rtnl_dereference(sch->stab));
1141 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1143 struct qdisc_size_table *ostab, *stab = NULL;
1146 if (tca[TCA_OPTIONS]) {
1147 if (sch->ops->change == NULL)
1149 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1154 if (tca[TCA_STAB]) {
1155 stab = qdisc_get_stab(tca[TCA_STAB]);
1157 return PTR_ERR(stab);
1160 ostab = rtnl_dereference(sch->stab);
1161 rcu_assign_pointer(sch->stab, stab);
1162 qdisc_put_stab(ostab);
1164 if (tca[TCA_RATE]) {
1165 /* NB: ignores errors from replace_estimator
1166 because change can't be undone. */
1167 if (sch->flags & TCQ_F_MQROOT)
1169 gen_replace_estimator(&sch->bstats,
1173 qdisc_root_sleeping_running(sch),
1180 struct check_loop_arg {
1181 struct qdisc_walker w;
1186 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1187 struct qdisc_walker *w);
1189 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1191 struct check_loop_arg arg;
1193 if (q->ops->cl_ops == NULL)
1196 arg.w.stop = arg.w.skip = arg.w.count = 0;
1197 arg.w.fn = check_loop_fn;
1200 q->ops->cl_ops->walk(q, &arg.w);
1201 return arg.w.stop ? -ELOOP : 0;
1205 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1208 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1209 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1211 leaf = cops->leaf(q, cl);
1213 if (leaf == arg->p || arg->depth > 7)
1215 return check_loop(leaf, arg->p, arg->depth + 1);
1224 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1225 [TCA_KIND] = { .type = NLA_NUL_STRING,
1226 .len = IFNAMSIZ - 1 },
1227 [TCA_RATE] = { .type = NLA_BINARY,
1228 .len = sizeof(struct tc_estimator) },
1229 [TCA_STAB] = { .type = NLA_NESTED },
1230 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1231 [TCA_CHAIN] = { .type = NLA_U32 },
1234 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1235 struct netlink_ext_ack *extack)
1237 struct net *net = sock_net(skb->sk);
1238 struct tcmsg *tcm = nlmsg_data(n);
1239 struct nlattr *tca[TCA_MAX + 1];
1240 struct net_device *dev;
1242 struct Qdisc *q = NULL;
1243 struct Qdisc *p = NULL;
1246 if ((n->nlmsg_type != RTM_GETQDISC) &&
1247 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1250 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1255 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1259 clid = tcm->tcm_parent;
1261 if (clid != TC_H_ROOT) {
1262 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1263 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1266 q = qdisc_leaf(p, clid);
1267 } else if (dev_ingress_queue(dev)) {
1268 q = dev_ingress_queue(dev)->qdisc_sleeping;
1276 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1279 q = qdisc_lookup(dev, tcm->tcm_handle);
1284 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1287 if (n->nlmsg_type == RTM_DELQDISC) {
1292 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1296 qdisc_notify(net, skb, n, clid, NULL, q);
1302 * Create/change qdisc.
1305 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1306 struct netlink_ext_ack *extack)
1308 struct net *net = sock_net(skb->sk);
1310 struct nlattr *tca[TCA_MAX + 1];
1311 struct net_device *dev;
1313 struct Qdisc *q, *p;
1316 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1320 /* Reinit, just in case something touches this. */
1321 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1326 tcm = nlmsg_data(n);
1327 clid = tcm->tcm_parent;
1330 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1336 if (clid != TC_H_ROOT) {
1337 if (clid != TC_H_INGRESS) {
1338 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1341 q = qdisc_leaf(p, clid);
1342 } else if (dev_ingress_queue_create(dev)) {
1343 q = dev_ingress_queue(dev)->qdisc_sleeping;
1349 /* It may be default qdisc, ignore it */
1350 if (q && q->handle == 0)
1353 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1354 if (tcm->tcm_handle) {
1355 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1357 if (TC_H_MIN(tcm->tcm_handle))
1359 q = qdisc_lookup(dev, tcm->tcm_handle);
1361 goto create_n_graft;
1362 if (n->nlmsg_flags & NLM_F_EXCL)
1364 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1367 (p && check_loop(q, p, 0)))
1369 qdisc_refcount_inc(q);
1373 goto create_n_graft;
1375 /* This magic test requires explanation.
1377 * We know, that some child q is already
1378 * attached to this parent and have choice:
1379 * either to change it or to create/graft new one.
1381 * 1. We are allowed to create/graft only
1382 * if CREATE and REPLACE flags are set.
1384 * 2. If EXCL is set, requestor wanted to say,
1385 * that qdisc tcm_handle is not expected
1386 * to exist, so that we choose create/graft too.
1388 * 3. The last case is when no flags are set.
1389 * Alas, it is sort of hole in API, we
1390 * cannot decide what to do unambiguously.
1391 * For now we select create/graft, if
1392 * user gave KIND, which does not match existing.
1394 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1395 (n->nlmsg_flags & NLM_F_REPLACE) &&
1396 ((n->nlmsg_flags & NLM_F_EXCL) ||
1398 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1399 goto create_n_graft;
1403 if (!tcm->tcm_handle)
1405 q = qdisc_lookup(dev, tcm->tcm_handle);
1408 /* Change qdisc parameters */
1411 if (n->nlmsg_flags & NLM_F_EXCL)
1413 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1415 err = qdisc_change(q, tca);
1417 qdisc_notify(net, skb, n, clid, NULL, q);
1421 if (!(n->nlmsg_flags & NLM_F_CREATE))
1423 if (clid == TC_H_INGRESS) {
1424 if (dev_ingress_queue(dev))
1425 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1426 tcm->tcm_parent, tcm->tcm_parent,
1431 struct netdev_queue *dev_queue;
1433 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1434 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1436 dev_queue = p->dev_queue;
1438 dev_queue = netdev_get_tx_queue(dev, 0);
1440 q = qdisc_create(dev, dev_queue, p,
1441 tcm->tcm_parent, tcm->tcm_handle,
1451 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1461 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1462 struct netlink_callback *cb,
1463 int *q_idx_p, int s_q_idx, bool recur,
1464 bool dump_invisible)
1466 int ret = 0, q_idx = *q_idx_p;
1474 if (q_idx < s_q_idx) {
1477 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1478 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1479 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1485 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1486 * itself has already been dumped.
1488 * If we've already dumped the top-level (ingress) qdisc above and the global
1489 * qdisc hashtable, we don't want to hit it again
1491 if (!qdisc_dev(root) || !recur)
1494 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1495 if (q_idx < s_q_idx) {
1499 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1500 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1501 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1515 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1517 struct net *net = sock_net(skb->sk);
1520 struct net_device *dev;
1521 const struct nlmsghdr *nlh = cb->nlh;
1522 struct tcmsg *tcm = nlmsg_data(nlh);
1523 struct nlattr *tca[TCA_MAX + 1];
1526 s_idx = cb->args[0];
1527 s_q_idx = q_idx = cb->args[1];
1532 err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX,
1533 rtm_tca_policy, NULL);
1537 for_each_netdev(net, dev) {
1538 struct netdev_queue *dev_queue;
1546 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1547 true, tca[TCA_DUMP_INVISIBLE]) < 0)
1550 dev_queue = dev_ingress_queue(dev);
1552 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1553 &q_idx, s_q_idx, false,
1554 tca[TCA_DUMP_INVISIBLE]) < 0)
1563 cb->args[1] = q_idx;
1570 /************************************************
1571 * Traffic classes manipulation. *
1572 ************************************************/
1574 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1576 u32 portid, u32 seq, u16 flags, int event)
1579 struct nlmsghdr *nlh;
1580 unsigned char *b = skb_tail_pointer(skb);
1582 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1585 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1587 goto out_nlmsg_trim;
1588 tcm = nlmsg_data(nlh);
1589 tcm->tcm_family = AF_UNSPEC;
1592 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1593 tcm->tcm_parent = q->handle;
1594 tcm->tcm_handle = q->handle;
1596 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1597 goto nla_put_failure;
1598 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1599 goto nla_put_failure;
1601 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1602 NULL, &d, TCA_PAD) < 0)
1603 goto nla_put_failure;
1605 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1606 goto nla_put_failure;
1608 if (gnet_stats_finish_copy(&d) < 0)
1609 goto nla_put_failure;
1611 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1620 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1621 struct nlmsghdr *n, struct Qdisc *q,
1622 unsigned long cl, int event)
1624 struct sk_buff *skb;
1625 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1627 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1631 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1636 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1637 n->nlmsg_flags & NLM_F_ECHO);
1640 static int tclass_del_notify(struct net *net,
1641 const struct Qdisc_class_ops *cops,
1642 struct sk_buff *oskb, struct nlmsghdr *n,
1643 struct Qdisc *q, unsigned long cl)
1645 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1646 struct sk_buff *skb;
1652 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1656 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1657 RTM_DELTCLASS) < 0) {
1662 err = cops->delete(q, cl);
1668 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1669 n->nlmsg_flags & NLM_F_ECHO);
1672 #ifdef CONFIG_NET_CLS
1674 struct tcf_bind_args {
1675 struct tcf_walker w;
1680 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1682 struct tcf_bind_args *a = (void *)arg;
1684 if (tp->ops->bind_class) {
1686 tp->ops->bind_class(n, a->classid, a->cl);
1687 tcf_tree_unlock(tp);
1692 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1693 unsigned long new_cl)
1695 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1696 struct tcf_block *block;
1697 struct tcf_chain *chain;
1700 cl = cops->find(q, portid);
1703 if (!cops->tcf_block)
1705 block = cops->tcf_block(q, cl);
1708 list_for_each_entry(chain, &block->chain_list, list) {
1709 struct tcf_proto *tp;
1711 for (tp = rtnl_dereference(chain->filter_chain);
1712 tp; tp = rtnl_dereference(tp->next)) {
1713 struct tcf_bind_args arg = {};
1715 arg.w.fn = tcf_node_bind;
1718 tp->ops->walk(tp, &arg.w);
1725 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1726 unsigned long new_cl)
1732 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1733 struct netlink_ext_ack *extack)
1735 struct net *net = sock_net(skb->sk);
1736 struct tcmsg *tcm = nlmsg_data(n);
1737 struct nlattr *tca[TCA_MAX + 1];
1738 struct net_device *dev;
1739 struct Qdisc *q = NULL;
1740 const struct Qdisc_class_ops *cops;
1741 unsigned long cl = 0;
1742 unsigned long new_cl;
1748 if ((n->nlmsg_type != RTM_GETTCLASS) &&
1749 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1752 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1757 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1762 parent == TC_H_UNSPEC - unspecified parent.
1763 parent == TC_H_ROOT - class is root, which has no parent.
1764 parent == X:0 - parent is root class.
1765 parent == X:Y - parent is a node in hierarchy.
1766 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1768 handle == 0:0 - generate handle from kernel pool.
1769 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1770 handle == X:Y - clear.
1771 handle == X:0 - root class.
1774 /* Step 1. Determine qdisc handle X:0 */
1776 portid = tcm->tcm_parent;
1777 clid = tcm->tcm_handle;
1778 qid = TC_H_MAJ(clid);
1780 if (portid != TC_H_ROOT) {
1781 u32 qid1 = TC_H_MAJ(portid);
1784 /* If both majors are known, they must be identical. */
1789 } else if (qid == 0)
1790 qid = dev->qdisc->handle;
1792 /* Now qid is genuine qdisc handle consistent
1793 * both with parent and child.
1795 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1798 portid = TC_H_MAKE(qid, portid);
1801 qid = dev->qdisc->handle;
1804 /* OK. Locate qdisc */
1805 q = qdisc_lookup(dev, qid);
1809 /* An check that it supports classes */
1810 cops = q->ops->cl_ops;
1814 /* Now try to get class */
1816 if (portid == TC_H_ROOT)
1819 clid = TC_H_MAKE(qid, clid);
1822 cl = cops->find(q, clid);
1826 if (n->nlmsg_type != RTM_NEWTCLASS ||
1827 !(n->nlmsg_flags & NLM_F_CREATE))
1830 switch (n->nlmsg_type) {
1833 if (n->nlmsg_flags & NLM_F_EXCL)
1837 err = tclass_del_notify(net, cops, skb, n, q, cl);
1838 /* Unbind the class with flilters with 0 */
1839 tc_bind_tclass(q, portid, clid, 0);
1842 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1853 err = cops->change(q, clid, portid, tca, &new_cl);
1855 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1856 /* We just create a new class, need to do reverse binding. */
1858 tc_bind_tclass(q, portid, clid, new_cl);
1864 struct qdisc_dump_args {
1865 struct qdisc_walker w;
1866 struct sk_buff *skb;
1867 struct netlink_callback *cb;
1870 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1871 struct qdisc_walker *arg)
1873 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1875 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1876 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1880 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1881 struct tcmsg *tcm, struct netlink_callback *cb,
1884 struct qdisc_dump_args arg;
1886 if (tc_qdisc_dump_ignore(q, false) ||
1887 *t_p < s_t || !q->ops->cl_ops ||
1889 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1894 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1895 arg.w.fn = qdisc_class_dump;
1899 arg.w.skip = cb->args[1];
1901 q->ops->cl_ops->walk(q, &arg.w);
1902 cb->args[1] = arg.w.count;
1909 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1910 struct tcmsg *tcm, struct netlink_callback *cb,
1911 int *t_p, int s_t, bool recur)
1919 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1922 if (!qdisc_dev(root) || !recur)
1925 if (tcm->tcm_parent) {
1926 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1927 if (q && q != root &&
1928 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1932 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1933 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1940 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1942 struct tcmsg *tcm = nlmsg_data(cb->nlh);
1943 struct net *net = sock_net(skb->sk);
1944 struct netdev_queue *dev_queue;
1945 struct net_device *dev;
1948 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1950 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1957 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
1960 dev_queue = dev_ingress_queue(dev);
1962 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1963 &t, s_t, false) < 0)
1973 #ifdef CONFIG_PROC_FS
1974 static int psched_show(struct seq_file *seq, void *v)
1976 seq_printf(seq, "%08x %08x %08x %08x\n",
1977 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1979 (u32)NSEC_PER_SEC / hrtimer_resolution);
1984 static int psched_open(struct inode *inode, struct file *file)
1986 return single_open(file, psched_show, NULL);
1989 static const struct file_operations psched_fops = {
1990 .owner = THIS_MODULE,
1991 .open = psched_open,
1993 .llseek = seq_lseek,
1994 .release = single_release,
1997 static int __net_init psched_net_init(struct net *net)
1999 struct proc_dir_entry *e;
2001 e = proc_create("psched", 0, net->proc_net, &psched_fops);
2008 static void __net_exit psched_net_exit(struct net *net)
2010 remove_proc_entry("psched", net->proc_net);
2013 static int __net_init psched_net_init(struct net *net)
2018 static void __net_exit psched_net_exit(struct net *net)
2023 static struct pernet_operations psched_net_ops = {
2024 .init = psched_net_init,
2025 .exit = psched_net_exit,
2028 static int __init pktsched_init(void)
2032 err = register_pernet_subsys(&psched_net_ops);
2034 pr_err("pktsched_init: "
2035 "cannot initialize per netns operations\n");
2039 register_qdisc(&pfifo_fast_ops);
2040 register_qdisc(&pfifo_qdisc_ops);
2041 register_qdisc(&bfifo_qdisc_ops);
2042 register_qdisc(&pfifo_head_drop_qdisc_ops);
2043 register_qdisc(&mq_qdisc_ops);
2044 register_qdisc(&noqueue_qdisc_ops);
2046 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2047 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2048 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2050 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2051 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2052 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2058 subsys_initcall(pktsched_init);