GNU Linux-libre 4.19.245-gnu1
[releases.git] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25
26
27 /*      Parameters, settable by user:
28         -----------------------------
29
30         limit           - bytes (must be > qth_max + burst)
31
32         Hard limit on queue length, should be chosen >qth_max
33         to allow packet bursts. This parameter does not
34         affect the algorithms behaviour and can be chosen
35         arbitrarily high (well, less than ram size)
36         Really, this limit will never be reached
37         if RED works correctly.
38  */
39
40 struct red_sched_data {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53         return q->flags & TC_RED_ECN;
54 }
55
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58         return q->flags & TC_RED_HARDDROP;
59 }
60
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62                        struct sk_buff **to_free)
63 {
64         struct red_sched_data *q = qdisc_priv(sch);
65         struct Qdisc *child = q->qdisc;
66         int ret;
67
68         q->vars.qavg = red_calc_qavg(&q->parms,
69                                      &q->vars,
70                                      child->qstats.backlog);
71
72         if (red_is_idling(&q->vars))
73                 red_end_of_idle_period(&q->vars);
74
75         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76         case RED_DONT_MARK:
77                 break;
78
79         case RED_PROB_MARK:
80                 qdisc_qstats_overlimit(sch);
81                 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82                         q->stats.prob_drop++;
83                         goto congestion_drop;
84                 }
85
86                 q->stats.prob_mark++;
87                 break;
88
89         case RED_HARD_MARK:
90                 qdisc_qstats_overlimit(sch);
91                 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92                     !INET_ECN_set_ce(skb)) {
93                         q->stats.forced_drop++;
94                         goto congestion_drop;
95                 }
96
97                 q->stats.forced_mark++;
98                 break;
99         }
100
101         ret = qdisc_enqueue(skb, child, to_free);
102         if (likely(ret == NET_XMIT_SUCCESS)) {
103                 qdisc_qstats_backlog_inc(sch, skb);
104                 sch->q.qlen++;
105         } else if (net_xmit_drop_count(ret)) {
106                 q->stats.pdrop++;
107                 qdisc_qstats_drop(sch);
108         }
109         return ret;
110
111 congestion_drop:
112         qdisc_drop(skb, sch, to_free);
113         return NET_XMIT_CN;
114 }
115
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118         struct sk_buff *skb;
119         struct red_sched_data *q = qdisc_priv(sch);
120         struct Qdisc *child = q->qdisc;
121
122         skb = child->dequeue(child);
123         if (skb) {
124                 qdisc_bstats_update(sch, skb);
125                 qdisc_qstats_backlog_dec(sch, skb);
126                 sch->q.qlen--;
127         } else {
128                 if (!red_is_idling(&q->vars))
129                         red_start_of_idle_period(&q->vars);
130         }
131         return skb;
132 }
133
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136         struct red_sched_data *q = qdisc_priv(sch);
137         struct Qdisc *child = q->qdisc;
138
139         return child->ops->peek(child);
140 }
141
142 static void red_reset(struct Qdisc *sch)
143 {
144         struct red_sched_data *q = qdisc_priv(sch);
145
146         qdisc_reset(q->qdisc);
147         sch->qstats.backlog = 0;
148         sch->q.qlen = 0;
149         red_restart(&q->vars);
150 }
151
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154         struct red_sched_data *q = qdisc_priv(sch);
155         struct net_device *dev = qdisc_dev(sch);
156         struct tc_red_qopt_offload opt = {
157                 .handle = sch->handle,
158                 .parent = sch->parent,
159         };
160
161         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162                 return -EOPNOTSUPP;
163
164         if (enable) {
165                 opt.command = TC_RED_REPLACE;
166                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168                 opt.set.probability = q->parms.max_P;
169                 opt.set.is_ecn = red_use_ecn(q);
170                 opt.set.qstats = &sch->qstats;
171         } else {
172                 opt.command = TC_RED_DESTROY;
173         }
174
175         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176 }
177
178 static void red_destroy(struct Qdisc *sch)
179 {
180         struct red_sched_data *q = qdisc_priv(sch);
181
182         del_timer_sync(&q->adapt_timer);
183         red_offload(sch, false);
184         qdisc_put(q->qdisc);
185 }
186
187 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
189         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
190         [TCA_RED_MAX_P] = { .type = NLA_U32 },
191 };
192
193 static int red_change(struct Qdisc *sch, struct nlattr *opt,
194                       struct netlink_ext_ack *extack)
195 {
196         struct red_sched_data *q = qdisc_priv(sch);
197         struct nlattr *tb[TCA_RED_MAX + 1];
198         struct tc_red_qopt *ctl;
199         struct Qdisc *child = NULL;
200         int err;
201         u32 max_P;
202         u8 *stab;
203
204         if (opt == NULL)
205                 return -EINVAL;
206
207         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
208         if (err < 0)
209                 return err;
210
211         if (tb[TCA_RED_PARMS] == NULL ||
212             tb[TCA_RED_STAB] == NULL)
213                 return -EINVAL;
214
215         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
216
217         ctl = nla_data(tb[TCA_RED_PARMS]);
218         stab = nla_data(tb[TCA_RED_STAB]);
219         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
220                               ctl->Scell_log, stab))
221                 return -EINVAL;
222
223         if (ctl->limit > 0) {
224                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
225                                          extack);
226                 if (IS_ERR(child))
227                         return PTR_ERR(child);
228
229                 /* child is fifo, no need to check for noop_qdisc */
230                 qdisc_hash_add(child, true);
231         }
232
233         sch_tree_lock(sch);
234         q->flags = ctl->flags;
235         q->limit = ctl->limit;
236         if (child) {
237                 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
238                                           q->qdisc->qstats.backlog);
239                 qdisc_put(q->qdisc);
240                 q->qdisc = child;
241         }
242
243         red_set_parms(&q->parms,
244                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
245                       ctl->Plog, ctl->Scell_log,
246                       stab,
247                       max_P);
248         red_set_vars(&q->vars);
249
250         del_timer(&q->adapt_timer);
251         if (ctl->flags & TC_RED_ADAPTATIVE)
252                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
253
254         if (!q->qdisc->q.qlen)
255                 red_start_of_idle_period(&q->vars);
256
257         sch_tree_unlock(sch);
258         red_offload(sch, true);
259         return 0;
260 }
261
262 static inline void red_adaptative_timer(struct timer_list *t)
263 {
264         struct red_sched_data *q = from_timer(q, t, adapt_timer);
265         struct Qdisc *sch = q->sch;
266         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
267
268         spin_lock(root_lock);
269         red_adaptative_algo(&q->parms, &q->vars);
270         mod_timer(&q->adapt_timer, jiffies + HZ/2);
271         spin_unlock(root_lock);
272 }
273
274 static int red_init(struct Qdisc *sch, struct nlattr *opt,
275                     struct netlink_ext_ack *extack)
276 {
277         struct red_sched_data *q = qdisc_priv(sch);
278
279         q->qdisc = &noop_qdisc;
280         q->sch = sch;
281         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
282         return red_change(sch, opt, extack);
283 }
284
285 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
286 {
287         struct net_device *dev = qdisc_dev(sch);
288         struct tc_red_qopt_offload hw_stats = {
289                 .command = TC_RED_STATS,
290                 .handle = sch->handle,
291                 .parent = sch->parent,
292                 {
293                         .stats.bstats = &sch->bstats,
294                         .stats.qstats = &sch->qstats,
295                 },
296         };
297         int err;
298
299         sch->flags &= ~TCQ_F_OFFLOADED;
300
301         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
302                 return 0;
303
304         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
305                                             &hw_stats);
306         if (err == -EOPNOTSUPP)
307                 return 0;
308
309         if (!err)
310                 sch->flags |= TCQ_F_OFFLOADED;
311
312         return err;
313 }
314
315 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
316 {
317         struct red_sched_data *q = qdisc_priv(sch);
318         struct nlattr *opts = NULL;
319         struct tc_red_qopt opt = {
320                 .limit          = q->limit,
321                 .flags          = q->flags,
322                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
323                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
324                 .Wlog           = q->parms.Wlog,
325                 .Plog           = q->parms.Plog,
326                 .Scell_log      = q->parms.Scell_log,
327         };
328         int err;
329
330         err = red_dump_offload_stats(sch, &opt);
331         if (err)
332                 goto nla_put_failure;
333
334         opts = nla_nest_start(skb, TCA_OPTIONS);
335         if (opts == NULL)
336                 goto nla_put_failure;
337         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
338             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
339                 goto nla_put_failure;
340         return nla_nest_end(skb, opts);
341
342 nla_put_failure:
343         nla_nest_cancel(skb, opts);
344         return -EMSGSIZE;
345 }
346
347 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
348 {
349         struct red_sched_data *q = qdisc_priv(sch);
350         struct net_device *dev = qdisc_dev(sch);
351         struct tc_red_xstats st = {0};
352
353         if (sch->flags & TCQ_F_OFFLOADED) {
354                 struct tc_red_qopt_offload hw_stats_request = {
355                         .command = TC_RED_XSTATS,
356                         .handle = sch->handle,
357                         .parent = sch->parent,
358                         {
359                                 .xstats = &q->stats,
360                         },
361                 };
362                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
363                                               &hw_stats_request);
364         }
365         st.early = q->stats.prob_drop + q->stats.forced_drop;
366         st.pdrop = q->stats.pdrop;
367         st.other = q->stats.other;
368         st.marked = q->stats.prob_mark + q->stats.forced_mark;
369
370         return gnet_stats_copy_app(d, &st, sizeof(st));
371 }
372
373 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
374                           struct sk_buff *skb, struct tcmsg *tcm)
375 {
376         struct red_sched_data *q = qdisc_priv(sch);
377
378         tcm->tcm_handle |= TC_H_MIN(1);
379         tcm->tcm_info = q->qdisc->handle;
380         return 0;
381 }
382
383 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
384                      struct Qdisc **old, struct netlink_ext_ack *extack)
385 {
386         struct red_sched_data *q = qdisc_priv(sch);
387
388         if (new == NULL)
389                 new = &noop_qdisc;
390
391         *old = qdisc_replace(sch, new, &q->qdisc);
392         return 0;
393 }
394
395 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
396 {
397         struct red_sched_data *q = qdisc_priv(sch);
398         return q->qdisc;
399 }
400
401 static unsigned long red_find(struct Qdisc *sch, u32 classid)
402 {
403         return 1;
404 }
405
406 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
407 {
408         if (!walker->stop) {
409                 if (walker->count >= walker->skip)
410                         if (walker->fn(sch, 1, walker) < 0) {
411                                 walker->stop = 1;
412                                 return;
413                         }
414                 walker->count++;
415         }
416 }
417
418 static const struct Qdisc_class_ops red_class_ops = {
419         .graft          =       red_graft,
420         .leaf           =       red_leaf,
421         .find           =       red_find,
422         .walk           =       red_walk,
423         .dump           =       red_dump_class,
424 };
425
426 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
427         .id             =       "red",
428         .priv_size      =       sizeof(struct red_sched_data),
429         .cl_ops         =       &red_class_ops,
430         .enqueue        =       red_enqueue,
431         .dequeue        =       red_dequeue,
432         .peek           =       red_peek,
433         .init           =       red_init,
434         .reset          =       red_reset,
435         .destroy        =       red_destroy,
436         .change         =       red_change,
437         .dump           =       red_dump,
438         .dump_stats     =       red_dump_stats,
439         .owner          =       THIS_MODULE,
440 };
441
442 static int __init red_module_init(void)
443 {
444         return register_qdisc(&red_qdisc_ops);
445 }
446
447 static void __exit red_module_exit(void)
448 {
449         unregister_qdisc(&red_qdisc_ops);
450 }
451
452 module_init(red_module_init)
453 module_exit(red_module_exit)
454
455 MODULE_LICENSE("GPL");