2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26 #include <linux/rbtree.h>
28 #include <net/netlink.h>
29 #include <net/pkt_sched.h>
30 #include <net/inet_ecn.h>
34 /* Network Emulation Queuing algorithm.
35 ====================================
37 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
38 Network Emulation Tool
39 [2] Luigi Rizzo, DummyNet for FreeBSD
41 ----------------------------------------------------------------
43 This started out as a simple way to delay outgoing packets to
44 test TCP but has grown to include most of the functionality
45 of a full blown network emulator like NISTnet. It can delay
46 packets and add random jitter (and correlation). The random
47 distribution can be loaded from a table as well to provide
48 normal, Pareto, or experimental curves. Packet loss,
49 duplication, and reordering can also be emulated.
51 This qdisc does not do classification that can be handled in
52 layering other disciplines. It does not need to do bandwidth
53 control either since that can be handled by using token
54 bucket or other rate control.
56 Correlated Loss Generator models
58 Added generation of correlated loss according to the
59 "Gilbert-Elliot" model, a 4-state markov model.
62 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64 and intuitive loss model for packet networks and its implementation
65 in the Netem module in the Linux kernel", available in [1]
67 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68 Fabio Ludovici <fabio.ludovici at yahoo.it>
71 struct netem_sched_data {
72 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
73 struct rb_root t_root;
75 /* optional qdisc for classful handling (NULL at netem init) */
78 struct qdisc_watchdog watchdog;
80 psched_tdiff_t latency;
81 psched_tdiff_t jitter;
94 struct reciprocal_value cell_size_reciprocal;
100 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
114 TX_IN_GAP_PERIOD = 1,
117 LOST_IN_BURST_PERIOD,
125 /* Correlated Loss Generation models */
127 /* state of the Markov chain */
130 /* 4-states and Gilbert-Elliot models */
131 u32 a1; /* p13 for 4-states or p for GE */
132 u32 a2; /* p31 for 4-states or r for GE */
133 u32 a3; /* p32 for 4-states or h for GE */
134 u32 a4; /* p14 for 4-states or 1-k for GE */
135 u32 a5; /* p23 used only in 4-states */
140 /* Time stamp put into socket buffer control block
141 * Only valid when skbs are in our internal t(ime)fifo queue.
143 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
144 * and skb->next & skb->prev are scratch space for a qdisc,
145 * we save skb->tstamp value in skb->cb[] before destroying it.
147 struct netem_skb_cb {
148 psched_time_t time_to_send;
152 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
154 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
155 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
156 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
159 /* init_crandom - initialize correlated random number generator
160 * Use entropy source for initial seed.
162 static void init_crandom(struct crndstate *state, unsigned long rho)
165 state->last = prandom_u32();
168 /* get_crandom - correlated random number generator
169 * Next number depends on last value.
170 * rho is scaled to avoid floating point.
172 static u32 get_crandom(struct crndstate *state)
175 unsigned long answer;
177 if (state->rho == 0) /* no correlation */
178 return prandom_u32();
180 value = prandom_u32();
181 rho = (u64)state->rho + 1;
182 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
183 state->last = answer;
187 /* loss_4state - 4-state model loss generator
188 * Generates losses according to the 4-state Markov chain adopted in
189 * the GI (General and Intuitive) loss model.
191 static bool loss_4state(struct netem_sched_data *q)
193 struct clgstate *clg = &q->clg;
194 u32 rnd = prandom_u32();
197 * Makes a comparison between rnd and the transition
198 * probabilities outgoing from the current state, then decides the
199 * next state and if the next packet has to be transmitted or lost.
200 * The four states correspond to:
201 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
202 * LOST_IN_BURST_PERIOD => isolated losses within a gap period
203 * LOST_IN_GAP_PERIOD => lost packets within a burst period
204 * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
206 switch (clg->state) {
207 case TX_IN_GAP_PERIOD:
209 clg->state = LOST_IN_BURST_PERIOD;
211 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
212 clg->state = LOST_IN_GAP_PERIOD;
214 } else if (clg->a1 + clg->a4 < rnd) {
215 clg->state = TX_IN_GAP_PERIOD;
219 case TX_IN_BURST_PERIOD:
221 clg->state = LOST_IN_GAP_PERIOD;
224 clg->state = TX_IN_BURST_PERIOD;
228 case LOST_IN_GAP_PERIOD:
230 clg->state = TX_IN_BURST_PERIOD;
231 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
232 clg->state = TX_IN_GAP_PERIOD;
233 } else if (clg->a2 + clg->a3 < rnd) {
234 clg->state = LOST_IN_GAP_PERIOD;
238 case LOST_IN_BURST_PERIOD:
239 clg->state = TX_IN_GAP_PERIOD;
246 /* loss_gilb_ell - Gilbert-Elliot model loss generator
247 * Generates losses according to the Gilbert-Elliot loss model or
248 * its special cases (Gilbert or Simple Gilbert)
250 * Makes a comparison between random number and the transition
251 * probabilities outgoing from the current state, then decides the
252 * next state. A second random number is extracted and the comparison
253 * with the loss probability of the current state decides if the next
254 * packet will be transmitted or lost.
256 static bool loss_gilb_ell(struct netem_sched_data *q)
258 struct clgstate *clg = &q->clg;
260 switch (clg->state) {
262 if (prandom_u32() < clg->a1)
263 clg->state = BAD_STATE;
264 if (prandom_u32() < clg->a4)
268 if (prandom_u32() < clg->a2)
269 clg->state = GOOD_STATE;
270 if (prandom_u32() > clg->a3)
277 static bool loss_event(struct netem_sched_data *q)
279 switch (q->loss_model) {
281 /* Random packet drop 0 => none, ~0 => all */
282 return q->loss && q->loss >= get_crandom(&q->loss_cor);
285 /* 4state loss model algorithm (used also for GI model)
286 * Extracts a value from the markov 4 state loss generator,
287 * if it is 1 drops a packet and if needed writes the event in
290 return loss_4state(q);
293 /* Gilbert-Elliot loss model algorithm
294 * Extracts a value from the Gilbert-Elliot loss generator,
295 * if it is 1 drops a packet and if needed writes the event in
298 return loss_gilb_ell(q);
301 return false; /* not reached */
305 /* tabledist - return a pseudo-randomly distributed value with mean mu and
306 * std deviation sigma. Uses table lookup to approximate the desired
307 * distribution, and a uniformly-distributed pseudo-random source.
309 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
310 struct crndstate *state,
311 const struct disttable *dist)
320 rnd = get_crandom(state);
322 /* default uniform distribution */
324 return (rnd % (2*sigma)) - sigma + mu;
326 t = dist->table[rnd % dist->size];
327 x = (sigma % NETEM_DIST_SCALE) * t;
329 x += NETEM_DIST_SCALE/2;
331 x -= NETEM_DIST_SCALE/2;
333 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
336 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
340 len += q->packet_overhead;
343 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
345 if (len > cells * q->cell_size) /* extra cell needed for remainder */
347 len = cells * (q->cell_size + q->cell_overhead);
350 ticks = (u64)len * NSEC_PER_SEC;
352 do_div(ticks, q->rate);
353 return PSCHED_NS2TICKS(ticks);
356 static void tfifo_reset(struct Qdisc *sch)
358 struct netem_sched_data *q = qdisc_priv(sch);
361 while ((p = rb_first(&q->t_root))) {
362 struct sk_buff *skb = rb_to_skb(p);
364 rb_erase(p, &q->t_root);
365 rtnl_kfree_skbs(skb, skb);
369 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
371 struct netem_sched_data *q = qdisc_priv(sch);
372 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
373 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
379 skb = rb_to_skb(parent);
380 if (tnext >= netem_skb_cb(skb)->time_to_send)
381 p = &parent->rb_right;
383 p = &parent->rb_left;
385 rb_link_node(&nskb->rbnode, parent, p);
386 rb_insert_color(&nskb->rbnode, &q->t_root);
390 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
391 * when we statistically choose to corrupt one, we instead segment it, returning
392 * the first packet to be corrupted, and re-enqueue the remaining frames
394 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
395 struct sk_buff **to_free)
397 struct sk_buff *segs;
398 netdev_features_t features = netif_skb_features(skb);
400 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
402 if (IS_ERR_OR_NULL(segs)) {
403 qdisc_drop(skb, sch, to_free);
410 static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
412 skb->next = qh->head;
421 * Insert one skb into qdisc.
422 * Note: parent depends on return value to account for queue length.
423 * NET_XMIT_DROP: queue length didn't change.
424 * NET_XMIT_SUCCESS: one skb was queued.
426 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
427 struct sk_buff **to_free)
429 struct netem_sched_data *q = qdisc_priv(sch);
430 /* We don't fill cb now as skb_unshare() may invalidate it */
431 struct netem_skb_cb *cb;
432 struct sk_buff *skb2;
433 struct sk_buff *segs = NULL;
434 unsigned int prev_len = qdisc_pkt_len(skb);
436 int rc = NET_XMIT_SUCCESS;
437 int rc_drop = NET_XMIT_DROP;
439 /* Do not fool qdisc_drop_all() */
442 /* Random duplication */
443 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
448 if (q->ecn && INET_ECN_set_ce(skb))
449 qdisc_qstats_drop(sch); /* mark packet */
454 qdisc_qstats_drop(sch);
455 __qdisc_drop(skb, to_free);
456 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
459 /* If a delay is expected, orphan the skb. (orphaning usually takes
460 * place at TX completion time, so _before_ the link transit delay)
462 if (q->latency || q->jitter || q->rate)
463 skb_orphan_partial(skb);
466 * If we need to duplicate packet, then re-insert at top of the
467 * qdisc tree, since parent queuer expects that only one
468 * skb will be queued.
470 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
471 struct Qdisc *rootq = qdisc_root_bh(sch);
472 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
475 rootq->enqueue(skb2, rootq, to_free);
476 q->duplicate = dupsave;
477 rc_drop = NET_XMIT_SUCCESS;
481 * Randomized packet corruption.
482 * Make copy if needed since we are modifying
483 * If packet is going to be hardware checksummed, then
484 * do it now in software before we mangle it.
486 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
487 if (skb_is_gso(skb)) {
488 segs = netem_segment(skb, sch, to_free);
491 qdisc_skb_cb(segs)->pkt_len = segs->len;
499 skb = skb_unshare(skb, GFP_ATOMIC);
500 if (unlikely(!skb)) {
501 qdisc_qstats_drop(sch);
504 if (skb->ip_summed == CHECKSUM_PARTIAL &&
505 skb_checksum_help(skb)) {
506 qdisc_drop(skb, sch, to_free);
511 skb->data[prandom_u32() % skb_headlen(skb)] ^=
512 1<<(prandom_u32() % 8);
515 if (unlikely(sch->q.qlen >= sch->limit)) {
516 qdisc_drop_all(skb, sch, to_free);
520 qdisc_qstats_backlog_inc(sch, skb);
522 cb = netem_skb_cb(skb);
523 if (q->gap == 0 || /* not doing reordering */
524 q->counter < q->gap - 1 || /* inside last reordering gap */
525 q->reorder < get_crandom(&q->reorder_cor)) {
527 psched_tdiff_t delay;
529 delay = tabledist(q->latency, q->jitter,
530 &q->delay_cor, q->delay_dist);
532 now = psched_get_time();
535 struct netem_skb_cb *last = NULL;
538 last = netem_skb_cb(sch->q.tail);
539 if (q->t_root.rb_node) {
540 struct sk_buff *t_skb;
541 struct netem_skb_cb *t_last;
543 t_skb = skb_rb_last(&q->t_root);
544 t_last = netem_skb_cb(t_skb);
546 t_last->time_to_send > last->time_to_send) {
553 * Last packet in queue is reference point (now),
554 * calculate this time bonus and subtract
557 delay -= last->time_to_send - now;
558 delay = max_t(psched_tdiff_t, 0, delay);
559 now = last->time_to_send;
562 delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
565 cb->time_to_send = now + delay;
566 cb->tstamp_save = skb->tstamp;
568 tfifo_enqueue(skb, sch);
571 * Do re-ordering by putting one out of N packets at the front
574 cb->time_to_send = psched_get_time();
577 netem_enqueue_skb_head(&sch->q, skb);
578 sch->qstats.requeues++;
583 unsigned int len, last_len;
586 len = skb ? skb->len : 0;
592 qdisc_skb_cb(segs)->pkt_len = segs->len;
593 last_len = segs->len;
594 rc = qdisc_enqueue(segs, sch, to_free);
595 if (rc != NET_XMIT_SUCCESS) {
596 if (net_xmit_drop_count(rc))
597 qdisc_qstats_drop(sch);
604 /* Parent qdiscs accounted for 1 skb of size @prev_len */
605 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
607 return NET_XMIT_DROP;
609 return NET_XMIT_SUCCESS;
612 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
614 struct netem_sched_data *q = qdisc_priv(sch);
619 skb = __qdisc_dequeue_head(&sch->q);
621 qdisc_qstats_backlog_dec(sch, skb);
623 qdisc_bstats_update(sch, skb);
626 p = rb_first(&q->t_root);
628 psched_time_t time_to_send;
632 /* if more time remaining? */
633 time_to_send = netem_skb_cb(skb)->time_to_send;
634 if (time_to_send <= psched_get_time()) {
635 rb_erase(p, &q->t_root);
638 qdisc_qstats_backlog_dec(sch, skb);
641 skb->tstamp = netem_skb_cb(skb)->tstamp_save;
642 /* skb->dev shares skb->rbnode area,
643 * we need to restore its value.
645 skb->dev = qdisc_dev(sch);
647 #ifdef CONFIG_NET_CLS_ACT
649 * If it's at ingress let's pretend the delay is
650 * from the network (tstamp will be updated).
652 if (skb->tc_redirected && skb->tc_from_ingress)
657 unsigned int pkt_len = qdisc_pkt_len(skb);
658 struct sk_buff *to_free = NULL;
661 err = qdisc_enqueue(skb, q->qdisc, &to_free);
662 kfree_skb_list(to_free);
663 if (err != NET_XMIT_SUCCESS &&
664 net_xmit_drop_count(err)) {
665 qdisc_qstats_drop(sch);
666 qdisc_tree_reduce_backlog(sch, 1,
675 skb = q->qdisc->ops->dequeue(q->qdisc);
679 qdisc_watchdog_schedule(&q->watchdog, time_to_send);
683 skb = q->qdisc->ops->dequeue(q->qdisc);
690 static void netem_reset(struct Qdisc *sch)
692 struct netem_sched_data *q = qdisc_priv(sch);
694 qdisc_reset_queue(sch);
697 qdisc_reset(q->qdisc);
698 qdisc_watchdog_cancel(&q->watchdog);
701 static void dist_free(struct disttable *d)
707 * Distribution data is a variable size payload containing
708 * signed 16 bit values.
710 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
712 struct netem_sched_data *q = qdisc_priv(sch);
713 size_t n = nla_len(attr)/sizeof(__s16);
714 const __s16 *data = nla_data(attr);
715 spinlock_t *root_lock;
719 if (!n || n > NETEM_DIST_MAX)
722 d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
727 for (i = 0; i < n; i++)
728 d->table[i] = data[i];
730 root_lock = qdisc_root_sleeping_lock(sch);
732 spin_lock_bh(root_lock);
733 swap(q->delay_dist, d);
734 spin_unlock_bh(root_lock);
740 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
742 const struct tc_netem_corr *c = nla_data(attr);
744 init_crandom(&q->delay_cor, c->delay_corr);
745 init_crandom(&q->loss_cor, c->loss_corr);
746 init_crandom(&q->dup_cor, c->dup_corr);
749 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
751 const struct tc_netem_reorder *r = nla_data(attr);
753 q->reorder = r->probability;
754 init_crandom(&q->reorder_cor, r->correlation);
757 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
759 const struct tc_netem_corrupt *r = nla_data(attr);
761 q->corrupt = r->probability;
762 init_crandom(&q->corrupt_cor, r->correlation);
765 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
767 const struct tc_netem_rate *r = nla_data(attr);
770 q->packet_overhead = r->packet_overhead;
771 q->cell_size = r->cell_size;
772 q->cell_overhead = r->cell_overhead;
774 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
776 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
779 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
781 const struct nlattr *la;
784 nla_for_each_nested(la, attr, rem) {
785 u16 type = nla_type(la);
788 case NETEM_LOSS_GI: {
789 const struct tc_netem_gimodel *gi = nla_data(la);
791 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
792 pr_info("netem: incorrect gi model size\n");
796 q->loss_model = CLG_4_STATES;
798 q->clg.state = TX_IN_GAP_PERIOD;
807 case NETEM_LOSS_GE: {
808 const struct tc_netem_gemodel *ge = nla_data(la);
810 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
811 pr_info("netem: incorrect ge model size\n");
815 q->loss_model = CLG_GILB_ELL;
816 q->clg.state = GOOD_STATE;
825 pr_info("netem: unknown loss type %u\n", type);
833 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
834 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
835 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
836 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
837 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
838 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
839 [TCA_NETEM_ECN] = { .type = NLA_U32 },
840 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
843 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
844 const struct nla_policy *policy, int len)
846 int nested_len = nla_len(nla) - NLA_ALIGN(len);
848 if (nested_len < 0) {
849 pr_info("netem: invalid attributes len %d\n", nested_len);
853 if (nested_len >= nla_attr_size(0))
854 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
855 nested_len, policy, NULL);
857 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
861 /* Parse netlink message to set options */
862 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
864 struct netem_sched_data *q = qdisc_priv(sch);
865 struct nlattr *tb[TCA_NETEM_MAX + 1];
866 struct tc_netem_qopt *qopt;
867 struct clgstate old_clg;
868 int old_loss_model = CLG_RANDOM;
874 qopt = nla_data(opt);
875 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
879 /* backup q->clg and q->loss_model */
881 old_loss_model = q->loss_model;
883 if (tb[TCA_NETEM_LOSS]) {
884 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
886 q->loss_model = old_loss_model;
890 q->loss_model = CLG_RANDOM;
893 if (tb[TCA_NETEM_DELAY_DIST]) {
894 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
896 /* recover clg and loss_model, in case of
897 * q->clg and q->loss_model were modified
901 q->loss_model = old_loss_model;
906 sch->limit = qopt->limit;
908 q->latency = qopt->latency;
909 q->jitter = qopt->jitter;
910 q->limit = qopt->limit;
913 q->loss = qopt->loss;
914 q->duplicate = qopt->duplicate;
916 /* for compatibility with earlier versions.
917 * if gap is set, need to assume 100% probability
922 if (tb[TCA_NETEM_CORR])
923 get_correlation(q, tb[TCA_NETEM_CORR]);
925 if (tb[TCA_NETEM_REORDER])
926 get_reorder(q, tb[TCA_NETEM_REORDER]);
928 if (tb[TCA_NETEM_CORRUPT])
929 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
931 if (tb[TCA_NETEM_RATE])
932 get_rate(q, tb[TCA_NETEM_RATE]);
934 if (tb[TCA_NETEM_RATE64])
935 q->rate = max_t(u64, q->rate,
936 nla_get_u64(tb[TCA_NETEM_RATE64]));
938 if (tb[TCA_NETEM_ECN])
939 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
944 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
946 struct netem_sched_data *q = qdisc_priv(sch);
949 qdisc_watchdog_init(&q->watchdog, sch);
954 q->loss_model = CLG_RANDOM;
955 ret = netem_change(sch, opt);
957 pr_info("netem: change failed\n");
961 static void netem_destroy(struct Qdisc *sch)
963 struct netem_sched_data *q = qdisc_priv(sch);
965 qdisc_watchdog_cancel(&q->watchdog);
967 qdisc_destroy(q->qdisc);
968 dist_free(q->delay_dist);
971 static int dump_loss_model(const struct netem_sched_data *q,
976 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
978 goto nla_put_failure;
980 switch (q->loss_model) {
982 /* legacy loss model */
983 nla_nest_cancel(skb, nest);
984 return 0; /* no data */
987 struct tc_netem_gimodel gi = {
995 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
996 goto nla_put_failure;
1000 struct tc_netem_gemodel ge = {
1007 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1008 goto nla_put_failure;
1013 nla_nest_end(skb, nest);
1017 nla_nest_cancel(skb, nest);
1021 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1023 const struct netem_sched_data *q = qdisc_priv(sch);
1024 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1025 struct tc_netem_qopt qopt;
1026 struct tc_netem_corr cor;
1027 struct tc_netem_reorder reorder;
1028 struct tc_netem_corrupt corrupt;
1029 struct tc_netem_rate rate;
1031 qopt.latency = q->latency;
1032 qopt.jitter = q->jitter;
1033 qopt.limit = q->limit;
1034 qopt.loss = q->loss;
1036 qopt.duplicate = q->duplicate;
1037 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1038 goto nla_put_failure;
1040 cor.delay_corr = q->delay_cor.rho;
1041 cor.loss_corr = q->loss_cor.rho;
1042 cor.dup_corr = q->dup_cor.rho;
1043 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1044 goto nla_put_failure;
1046 reorder.probability = q->reorder;
1047 reorder.correlation = q->reorder_cor.rho;
1048 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1049 goto nla_put_failure;
1051 corrupt.probability = q->corrupt;
1052 corrupt.correlation = q->corrupt_cor.rho;
1053 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1054 goto nla_put_failure;
1056 if (q->rate >= (1ULL << 32)) {
1057 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1059 goto nla_put_failure;
1062 rate.rate = q->rate;
1064 rate.packet_overhead = q->packet_overhead;
1065 rate.cell_size = q->cell_size;
1066 rate.cell_overhead = q->cell_overhead;
1067 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1068 goto nla_put_failure;
1070 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1071 goto nla_put_failure;
1073 if (dump_loss_model(q, skb) != 0)
1074 goto nla_put_failure;
1076 return nla_nest_end(skb, nla);
1079 nlmsg_trim(skb, nla);
1083 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1084 struct sk_buff *skb, struct tcmsg *tcm)
1086 struct netem_sched_data *q = qdisc_priv(sch);
1088 if (cl != 1 || !q->qdisc) /* only one class */
1091 tcm->tcm_handle |= TC_H_MIN(1);
1092 tcm->tcm_info = q->qdisc->handle;
1097 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1100 struct netem_sched_data *q = qdisc_priv(sch);
1102 *old = qdisc_replace(sch, new, &q->qdisc);
1106 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1108 struct netem_sched_data *q = qdisc_priv(sch);
1112 static unsigned long netem_find(struct Qdisc *sch, u32 classid)
1117 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1119 if (!walker->stop) {
1120 if (walker->count >= walker->skip)
1121 if (walker->fn(sch, 1, walker) < 0) {
1129 static const struct Qdisc_class_ops netem_class_ops = {
1130 .graft = netem_graft,
1134 .dump = netem_dump_class,
1137 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1139 .cl_ops = &netem_class_ops,
1140 .priv_size = sizeof(struct netem_sched_data),
1141 .enqueue = netem_enqueue,
1142 .dequeue = netem_dequeue,
1143 .peek = qdisc_peek_dequeued,
1145 .reset = netem_reset,
1146 .destroy = netem_destroy,
1147 .change = netem_change,
1149 .owner = THIS_MODULE,
1153 static int __init netem_module_init(void)
1155 pr_info("netem: version " VERSION "\n");
1156 return register_qdisc(&netem_qdisc_ops);
1158 static void __exit netem_module_exit(void)
1160 unregister_qdisc(&netem_qdisc_ops);
1162 module_init(netem_module_init)
1163 module_exit(netem_module_exit)
1164 MODULE_LICENSE("GPL");