2 * net/sched/sch_red.c Random Early Detection queue.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * J Hadi Salim 980914: computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim 980816: ECN support
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
27 /* Parameters, settable by user:
28 -----------------------------
30 limit - bytes (must be > qth_max + burst)
32 Hard limit on queue length, should be chosen >qth_max
33 to allow packet bursts. This parameter does not
34 affect the algorithms behaviour and can be chosen
35 arbitrarily high (well, less than ram size)
36 Really, this limit will never be reached
37 if RED works correctly.
40 struct red_sched_data {
41 u32 limit; /* HARD maximal queue length */
43 struct timer_list adapt_timer;
45 struct red_parms parms;
47 struct red_stats stats;
51 static inline int red_use_ecn(struct red_sched_data *q)
53 return q->flags & TC_RED_ECN;
56 static inline int red_use_harddrop(struct red_sched_data *q)
58 return q->flags & TC_RED_HARDDROP;
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 struct sk_buff **to_free)
64 struct red_sched_data *q = qdisc_priv(sch);
65 struct Qdisc *child = q->qdisc;
68 q->vars.qavg = red_calc_qavg(&q->parms,
70 child->qstats.backlog);
72 if (red_is_idling(&q->vars))
73 red_end_of_idle_period(&q->vars);
75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
80 qdisc_qstats_overlimit(sch);
81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
90 qdisc_qstats_overlimit(sch);
91 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 !INET_ECN_set_ce(skb)) {
93 q->stats.forced_drop++;
97 q->stats.forced_mark++;
101 ret = qdisc_enqueue(skb, child, to_free);
102 if (likely(ret == NET_XMIT_SUCCESS)) {
103 qdisc_qstats_backlog_inc(sch, skb);
105 } else if (net_xmit_drop_count(ret)) {
107 qdisc_qstats_drop(sch);
112 qdisc_drop(skb, sch, to_free);
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
119 struct red_sched_data *q = qdisc_priv(sch);
120 struct Qdisc *child = q->qdisc;
122 skb = child->dequeue(child);
124 qdisc_bstats_update(sch, skb);
125 qdisc_qstats_backlog_dec(sch, skb);
128 if (!red_is_idling(&q->vars))
129 red_start_of_idle_period(&q->vars);
134 static struct sk_buff *red_peek(struct Qdisc *sch)
136 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc;
139 return child->ops->peek(child);
142 static void red_reset(struct Qdisc *sch)
144 struct red_sched_data *q = qdisc_priv(sch);
146 qdisc_reset(q->qdisc);
147 sch->qstats.backlog = 0;
149 red_restart(&q->vars);
152 static int red_offload(struct Qdisc *sch, bool enable)
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
170 opt.set.qstats = &sch->qstats;
172 opt.command = TC_RED_DESTROY;
175 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
178 static void red_destroy(struct Qdisc *sch)
180 struct red_sched_data *q = qdisc_priv(sch);
182 del_timer_sync(&q->adapt_timer);
183 red_offload(sch, false);
187 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
189 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
190 [TCA_RED_MAX_P] = { .type = NLA_U32 },
193 static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 struct netlink_ext_ack *extack)
196 struct red_sched_data *q = qdisc_priv(sch);
197 struct nlattr *tb[TCA_RED_MAX + 1];
198 struct tc_red_qopt *ctl;
199 struct Qdisc *child = NULL;
207 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
211 if (tb[TCA_RED_PARMS] == NULL ||
212 tb[TCA_RED_STAB] == NULL)
215 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
217 ctl = nla_data(tb[TCA_RED_PARMS]);
218 stab = nla_data(tb[TCA_RED_STAB]);
219 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
220 ctl->Scell_log, stab))
223 if (ctl->limit > 0) {
224 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
227 return PTR_ERR(child);
229 /* child is fifo, no need to check for noop_qdisc */
230 qdisc_hash_add(child, true);
234 q->flags = ctl->flags;
235 q->limit = ctl->limit;
237 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
238 q->qdisc->qstats.backlog);
243 red_set_parms(&q->parms,
244 ctl->qth_min, ctl->qth_max, ctl->Wlog,
245 ctl->Plog, ctl->Scell_log,
248 red_set_vars(&q->vars);
250 del_timer(&q->adapt_timer);
251 if (ctl->flags & TC_RED_ADAPTATIVE)
252 mod_timer(&q->adapt_timer, jiffies + HZ/2);
254 if (!q->qdisc->q.qlen)
255 red_start_of_idle_period(&q->vars);
257 sch_tree_unlock(sch);
258 red_offload(sch, true);
262 static inline void red_adaptative_timer(struct timer_list *t)
264 struct red_sched_data *q = from_timer(q, t, adapt_timer);
265 struct Qdisc *sch = q->sch;
266 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
268 spin_lock(root_lock);
269 red_adaptative_algo(&q->parms, &q->vars);
270 mod_timer(&q->adapt_timer, jiffies + HZ/2);
271 spin_unlock(root_lock);
274 static int red_init(struct Qdisc *sch, struct nlattr *opt,
275 struct netlink_ext_ack *extack)
277 struct red_sched_data *q = qdisc_priv(sch);
279 q->qdisc = &noop_qdisc;
281 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282 return red_change(sch, opt, extack);
285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
287 struct net_device *dev = qdisc_dev(sch);
288 struct tc_red_qopt_offload hw_stats = {
289 .command = TC_RED_STATS,
290 .handle = sch->handle,
291 .parent = sch->parent,
293 .stats.bstats = &sch->bstats,
294 .stats.qstats = &sch->qstats,
299 sch->flags &= ~TCQ_F_OFFLOADED;
301 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
304 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
306 if (err == -EOPNOTSUPP)
310 sch->flags |= TCQ_F_OFFLOADED;
315 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
317 struct red_sched_data *q = qdisc_priv(sch);
318 struct nlattr *opts = NULL;
319 struct tc_red_qopt opt = {
322 .qth_min = q->parms.qth_min >> q->parms.Wlog,
323 .qth_max = q->parms.qth_max >> q->parms.Wlog,
324 .Wlog = q->parms.Wlog,
325 .Plog = q->parms.Plog,
326 .Scell_log = q->parms.Scell_log,
330 err = red_dump_offload_stats(sch, &opt);
332 goto nla_put_failure;
334 opts = nla_nest_start(skb, TCA_OPTIONS);
336 goto nla_put_failure;
337 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
338 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
339 goto nla_put_failure;
340 return nla_nest_end(skb, opts);
343 nla_nest_cancel(skb, opts);
347 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
349 struct red_sched_data *q = qdisc_priv(sch);
350 struct net_device *dev = qdisc_dev(sch);
351 struct tc_red_xstats st = {0};
353 if (sch->flags & TCQ_F_OFFLOADED) {
354 struct tc_red_qopt_offload hw_stats_request = {
355 .command = TC_RED_XSTATS,
356 .handle = sch->handle,
357 .parent = sch->parent,
362 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
365 st.early = q->stats.prob_drop + q->stats.forced_drop;
366 st.pdrop = q->stats.pdrop;
367 st.other = q->stats.other;
368 st.marked = q->stats.prob_mark + q->stats.forced_mark;
370 return gnet_stats_copy_app(d, &st, sizeof(st));
373 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
374 struct sk_buff *skb, struct tcmsg *tcm)
376 struct red_sched_data *q = qdisc_priv(sch);
378 tcm->tcm_handle |= TC_H_MIN(1);
379 tcm->tcm_info = q->qdisc->handle;
383 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
384 struct Qdisc **old, struct netlink_ext_ack *extack)
386 struct red_sched_data *q = qdisc_priv(sch);
391 *old = qdisc_replace(sch, new, &q->qdisc);
395 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
397 struct red_sched_data *q = qdisc_priv(sch);
401 static unsigned long red_find(struct Qdisc *sch, u32 classid)
406 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
409 if (walker->count >= walker->skip)
410 if (walker->fn(sch, 1, walker) < 0) {
418 static const struct Qdisc_class_ops red_class_ops = {
423 .dump = red_dump_class,
426 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
428 .priv_size = sizeof(struct red_sched_data),
429 .cl_ops = &red_class_ops,
430 .enqueue = red_enqueue,
431 .dequeue = red_dequeue,
435 .destroy = red_destroy,
436 .change = red_change,
438 .dump_stats = red_dump_stats,
439 .owner = THIS_MODULE,
442 static int __init red_module_init(void)
444 return register_qdisc(&red_qdisc_ops);
447 static void __exit red_module_exit(void)
449 unregister_qdisc(&red_qdisc_ops);
452 module_init(red_module_init)
453 module_exit(red_module_exit)
455 MODULE_LICENSE("GPL");