2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26 #include <linux/rbtree.h>
28 #include <net/netlink.h>
29 #include <net/pkt_sched.h>
30 #include <net/inet_ecn.h>
34 /* Network Emulation Queuing algorithm.
35 ====================================
37 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
38 Network Emulation Tool
39 [2] Luigi Rizzo, DummyNet for FreeBSD
41 ----------------------------------------------------------------
43 This started out as a simple way to delay outgoing packets to
44 test TCP but has grown to include most of the functionality
45 of a full blown network emulator like NISTnet. It can delay
46 packets and add random jitter (and correlation). The random
47 distribution can be loaded from a table as well to provide
48 normal, Pareto, or experimental curves. Packet loss,
49 duplication, and reordering can also be emulated.
51 This qdisc does not do classification that can be handled in
52 layering other disciplines. It does not need to do bandwidth
53 control either since that can be handled by using token
54 bucket or other rate control.
56 Correlated Loss Generator models
58 Added generation of correlated loss according to the
59 "Gilbert-Elliot" model, a 4-state markov model.
62 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64 and intuitive loss model for packet networks and its implementation
65 in the Netem module in the Linux kernel", available in [1]
67 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68 Fabio Ludovici <fabio.ludovici at yahoo.it>
71 struct netem_sched_data {
72 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
73 struct rb_root t_root;
75 /* optional qdisc for classful handling (NULL at netem init) */
78 struct qdisc_watchdog watchdog;
80 psched_tdiff_t latency;
81 psched_tdiff_t jitter;
94 struct reciprocal_value cell_size_reciprocal;
100 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
114 TX_IN_GAP_PERIOD = 1,
117 LOST_IN_BURST_PERIOD,
125 /* Correlated Loss Generation models */
127 /* state of the Markov chain */
130 /* 4-states and Gilbert-Elliot models */
131 u32 a1; /* p13 for 4-states or p for GE */
132 u32 a2; /* p31 for 4-states or r for GE */
133 u32 a3; /* p32 for 4-states or h for GE */
134 u32 a4; /* p14 for 4-states or 1-k for GE */
135 u32 a5; /* p23 used only in 4-states */
140 /* Time stamp put into socket buffer control block
141 * Only valid when skbs are in our internal t(ime)fifo queue.
143 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
144 * and skb->next & skb->prev are scratch space for a qdisc,
145 * we save skb->tstamp value in skb->cb[] before destroying it.
147 struct netem_skb_cb {
148 psched_time_t time_to_send;
153 static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
155 return container_of(rb, struct sk_buff, rbnode);
158 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
160 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
161 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
162 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
165 /* init_crandom - initialize correlated random number generator
166 * Use entropy source for initial seed.
168 static void init_crandom(struct crndstate *state, unsigned long rho)
171 state->last = prandom_u32();
174 /* get_crandom - correlated random number generator
175 * Next number depends on last value.
176 * rho is scaled to avoid floating point.
178 static u32 get_crandom(struct crndstate *state)
181 unsigned long answer;
183 if (state->rho == 0) /* no correlation */
184 return prandom_u32();
186 value = prandom_u32();
187 rho = (u64)state->rho + 1;
188 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
189 state->last = answer;
193 /* loss_4state - 4-state model loss generator
194 * Generates losses according to the 4-state Markov chain adopted in
195 * the GI (General and Intuitive) loss model.
197 static bool loss_4state(struct netem_sched_data *q)
199 struct clgstate *clg = &q->clg;
200 u32 rnd = prandom_u32();
203 * Makes a comparison between rnd and the transition
204 * probabilities outgoing from the current state, then decides the
205 * next state and if the next packet has to be transmitted or lost.
206 * The four states correspond to:
207 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
208 * LOST_IN_BURST_PERIOD => isolated losses within a gap period
209 * LOST_IN_GAP_PERIOD => lost packets within a burst period
210 * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
212 switch (clg->state) {
213 case TX_IN_GAP_PERIOD:
215 clg->state = LOST_IN_BURST_PERIOD;
217 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
218 clg->state = LOST_IN_GAP_PERIOD;
220 } else if (clg->a1 + clg->a4 < rnd) {
221 clg->state = TX_IN_GAP_PERIOD;
225 case TX_IN_BURST_PERIOD:
227 clg->state = LOST_IN_GAP_PERIOD;
230 clg->state = TX_IN_BURST_PERIOD;
234 case LOST_IN_GAP_PERIOD:
236 clg->state = TX_IN_BURST_PERIOD;
237 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
238 clg->state = TX_IN_GAP_PERIOD;
239 } else if (clg->a2 + clg->a3 < rnd) {
240 clg->state = LOST_IN_GAP_PERIOD;
244 case LOST_IN_BURST_PERIOD:
245 clg->state = TX_IN_GAP_PERIOD;
252 /* loss_gilb_ell - Gilbert-Elliot model loss generator
253 * Generates losses according to the Gilbert-Elliot loss model or
254 * its special cases (Gilbert or Simple Gilbert)
256 * Makes a comparison between random number and the transition
257 * probabilities outgoing from the current state, then decides the
258 * next state. A second random number is extracted and the comparison
259 * with the loss probability of the current state decides if the next
260 * packet will be transmitted or lost.
262 static bool loss_gilb_ell(struct netem_sched_data *q)
264 struct clgstate *clg = &q->clg;
266 switch (clg->state) {
268 if (prandom_u32() < clg->a1)
269 clg->state = BAD_STATE;
270 if (prandom_u32() < clg->a4)
274 if (prandom_u32() < clg->a2)
275 clg->state = GOOD_STATE;
276 if (prandom_u32() > clg->a3)
283 static bool loss_event(struct netem_sched_data *q)
285 switch (q->loss_model) {
287 /* Random packet drop 0 => none, ~0 => all */
288 return q->loss && q->loss >= get_crandom(&q->loss_cor);
291 /* 4state loss model algorithm (used also for GI model)
292 * Extracts a value from the markov 4 state loss generator,
293 * if it is 1 drops a packet and if needed writes the event in
296 return loss_4state(q);
299 /* Gilbert-Elliot loss model algorithm
300 * Extracts a value from the Gilbert-Elliot loss generator,
301 * if it is 1 drops a packet and if needed writes the event in
304 return loss_gilb_ell(q);
307 return false; /* not reached */
311 /* tabledist - return a pseudo-randomly distributed value with mean mu and
312 * std deviation sigma. Uses table lookup to approximate the desired
313 * distribution, and a uniformly-distributed pseudo-random source.
315 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
316 struct crndstate *state,
317 const struct disttable *dist)
326 rnd = get_crandom(state);
328 /* default uniform distribution */
330 return (rnd % (2*sigma)) - sigma + mu;
332 t = dist->table[rnd % dist->size];
333 x = (sigma % NETEM_DIST_SCALE) * t;
335 x += NETEM_DIST_SCALE/2;
337 x -= NETEM_DIST_SCALE/2;
339 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
342 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
346 len += q->packet_overhead;
349 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
351 if (len > cells * q->cell_size) /* extra cell needed for remainder */
353 len = cells * (q->cell_size + q->cell_overhead);
356 ticks = (u64)len * NSEC_PER_SEC;
358 do_div(ticks, q->rate);
359 return PSCHED_NS2TICKS(ticks);
362 static void tfifo_reset(struct Qdisc *sch)
364 struct netem_sched_data *q = qdisc_priv(sch);
367 while ((p = rb_first(&q->t_root))) {
368 struct sk_buff *skb = netem_rb_to_skb(p);
370 rb_erase(p, &q->t_root);
377 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
379 struct netem_sched_data *q = qdisc_priv(sch);
380 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
381 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
387 skb = netem_rb_to_skb(parent);
388 if (tnext >= netem_skb_cb(skb)->time_to_send)
389 p = &parent->rb_right;
391 p = &parent->rb_left;
393 rb_link_node(&nskb->rbnode, parent, p);
394 rb_insert_color(&nskb->rbnode, &q->t_root);
398 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
399 * when we statistically choose to corrupt one, we instead segment it, returning
400 * the first packet to be corrupted, and re-enqueue the remaining frames
402 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
404 struct sk_buff *segs;
405 netdev_features_t features = netif_skb_features(skb);
407 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
409 if (IS_ERR_OR_NULL(segs)) {
410 qdisc_reshape_fail(skb, sch);
418 * Insert one skb into qdisc.
419 * Note: parent depends on return value to account for queue length.
420 * NET_XMIT_DROP: queue length didn't change.
421 * NET_XMIT_SUCCESS: one skb was queued.
423 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
425 struct netem_sched_data *q = qdisc_priv(sch);
426 /* We don't fill cb now as skb_unshare() may invalidate it */
427 struct netem_skb_cb *cb;
428 struct sk_buff *skb2;
429 struct sk_buff *segs = NULL;
430 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
433 int rc = NET_XMIT_SUCCESS;
435 /* Do not fool qdisc_drop_all() */
438 /* Random duplication */
439 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
444 if (q->ecn && INET_ECN_set_ce(skb))
445 qdisc_qstats_drop(sch); /* mark packet */
450 qdisc_qstats_drop(sch);
452 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
455 /* If a delay is expected, orphan the skb. (orphaning usually takes
456 * place at TX completion time, so _before_ the link transit delay)
458 if (q->latency || q->jitter)
459 skb_orphan_partial(skb);
462 * If we need to duplicate packet, then re-insert at top of the
463 * qdisc tree, since parent queuer expects that only one
464 * skb will be queued.
466 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
467 struct Qdisc *rootq = qdisc_root_bh(sch);
468 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
471 rootq->enqueue(skb2, rootq);
472 q->duplicate = dupsave;
476 * Randomized packet corruption.
477 * Make copy if needed since we are modifying
478 * If packet is going to be hardware checksummed, then
479 * do it now in software before we mangle it.
481 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
482 if (skb_is_gso(skb)) {
483 segs = netem_segment(skb, sch);
485 return NET_XMIT_DROP;
493 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
494 (skb->ip_summed == CHECKSUM_PARTIAL &&
495 skb_checksum_help(skb))) {
496 rc = qdisc_drop(skb, sch);
500 skb->data[prandom_u32() % skb_headlen(skb)] ^=
501 1<<(prandom_u32() % 8);
504 if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
505 return qdisc_reshape_fail(skb, sch);
507 qdisc_qstats_backlog_inc(sch, skb);
509 cb = netem_skb_cb(skb);
510 if (q->gap == 0 || /* not doing reordering */
511 q->counter < q->gap - 1 || /* inside last reordering gap */
512 q->reorder < get_crandom(&q->reorder_cor)) {
514 psched_tdiff_t delay;
516 delay = tabledist(q->latency, q->jitter,
517 &q->delay_cor, q->delay_dist);
519 now = psched_get_time();
522 struct sk_buff *last;
524 if (!skb_queue_empty(&sch->q))
525 last = skb_peek_tail(&sch->q);
527 last = netem_rb_to_skb(rb_last(&q->t_root));
530 * Last packet in queue is reference point (now),
531 * calculate this time bonus and subtract
534 delay -= netem_skb_cb(last)->time_to_send - now;
535 delay = max_t(psched_tdiff_t, 0, delay);
536 now = netem_skb_cb(last)->time_to_send;
539 delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q);
542 cb->time_to_send = now + delay;
543 cb->tstamp_save = skb->tstamp;
545 tfifo_enqueue(skb, sch);
548 * Do re-ordering by putting one out of N packets at the front
551 cb->time_to_send = psched_get_time();
554 __skb_queue_head(&sch->q, skb);
555 sch->qstats.requeues++;
563 qdisc_skb_cb(segs)->pkt_len = segs->len;
564 last_len = segs->len;
565 rc = qdisc_enqueue(segs, sch);
566 if (rc != NET_XMIT_SUCCESS) {
567 if (net_xmit_drop_count(rc))
568 qdisc_qstats_drop(sch);
577 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
579 return NET_XMIT_SUCCESS;
582 static unsigned int netem_drop(struct Qdisc *sch)
584 struct netem_sched_data *q = qdisc_priv(sch);
587 len = qdisc_queue_drop(sch);
590 struct rb_node *p = rb_first(&q->t_root);
593 struct sk_buff *skb = netem_rb_to_skb(p);
595 rb_erase(p, &q->t_root);
599 qdisc_qstats_backlog_dec(sch, skb);
603 if (!len && q->qdisc && q->qdisc->ops->drop)
604 len = q->qdisc->ops->drop(q->qdisc);
606 qdisc_qstats_drop(sch);
611 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
613 struct netem_sched_data *q = qdisc_priv(sch);
617 if (qdisc_is_throttled(sch))
621 skb = __skb_dequeue(&sch->q);
623 qdisc_qstats_backlog_dec(sch, skb);
625 qdisc_unthrottled(sch);
626 qdisc_bstats_update(sch, skb);
629 p = rb_first(&q->t_root);
631 psched_time_t time_to_send;
633 skb = netem_rb_to_skb(p);
635 /* if more time remaining? */
636 time_to_send = netem_skb_cb(skb)->time_to_send;
637 if (time_to_send <= psched_get_time()) {
638 rb_erase(p, &q->t_root);
641 qdisc_qstats_backlog_dec(sch, skb);
644 skb->tstamp = netem_skb_cb(skb)->tstamp_save;
646 #ifdef CONFIG_NET_CLS_ACT
648 * If it's at ingress let's pretend the delay is
649 * from the network (tstamp will be updated).
651 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
652 skb->tstamp.tv64 = 0;
656 unsigned int pkt_len = qdisc_pkt_len(skb);
657 int err = qdisc_enqueue(skb, q->qdisc);
659 if (err != NET_XMIT_SUCCESS &&
660 net_xmit_drop_count(err)) {
661 qdisc_qstats_drop(sch);
662 qdisc_tree_reduce_backlog(sch, 1,
671 skb = q->qdisc->ops->dequeue(q->qdisc);
675 qdisc_watchdog_schedule(&q->watchdog, time_to_send);
679 skb = q->qdisc->ops->dequeue(q->qdisc);
686 static void netem_reset(struct Qdisc *sch)
688 struct netem_sched_data *q = qdisc_priv(sch);
690 qdisc_reset_queue(sch);
693 qdisc_reset(q->qdisc);
694 qdisc_watchdog_cancel(&q->watchdog);
697 static void dist_free(struct disttable *d)
703 * Distribution data is a variable size payload containing
704 * signed 16 bit values.
706 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
708 struct netem_sched_data *q = qdisc_priv(sch);
709 size_t n = nla_len(attr)/sizeof(__s16);
710 const __s16 *data = nla_data(attr);
711 spinlock_t *root_lock;
716 if (!n || n > NETEM_DIST_MAX)
719 s = sizeof(struct disttable) + n * sizeof(s16);
720 d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
727 for (i = 0; i < n; i++)
728 d->table[i] = data[i];
730 root_lock = qdisc_root_sleeping_lock(sch);
732 spin_lock_bh(root_lock);
733 swap(q->delay_dist, d);
734 spin_unlock_bh(root_lock);
740 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
742 const struct tc_netem_corr *c = nla_data(attr);
744 init_crandom(&q->delay_cor, c->delay_corr);
745 init_crandom(&q->loss_cor, c->loss_corr);
746 init_crandom(&q->dup_cor, c->dup_corr);
749 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
751 const struct tc_netem_reorder *r = nla_data(attr);
753 q->reorder = r->probability;
754 init_crandom(&q->reorder_cor, r->correlation);
757 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
759 const struct tc_netem_corrupt *r = nla_data(attr);
761 q->corrupt = r->probability;
762 init_crandom(&q->corrupt_cor, r->correlation);
765 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
767 const struct tc_netem_rate *r = nla_data(attr);
770 q->packet_overhead = r->packet_overhead;
771 q->cell_size = r->cell_size;
772 q->cell_overhead = r->cell_overhead;
774 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
776 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
779 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
781 const struct nlattr *la;
784 nla_for_each_nested(la, attr, rem) {
785 u16 type = nla_type(la);
788 case NETEM_LOSS_GI: {
789 const struct tc_netem_gimodel *gi = nla_data(la);
791 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
792 pr_info("netem: incorrect gi model size\n");
796 q->loss_model = CLG_4_STATES;
798 q->clg.state = TX_IN_GAP_PERIOD;
807 case NETEM_LOSS_GE: {
808 const struct tc_netem_gemodel *ge = nla_data(la);
810 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
811 pr_info("netem: incorrect ge model size\n");
815 q->loss_model = CLG_GILB_ELL;
816 q->clg.state = GOOD_STATE;
825 pr_info("netem: unknown loss type %u\n", type);
833 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
834 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
835 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
836 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
837 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
838 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
839 [TCA_NETEM_ECN] = { .type = NLA_U32 },
840 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
843 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
844 const struct nla_policy *policy, int len)
846 int nested_len = nla_len(nla) - NLA_ALIGN(len);
848 if (nested_len < 0) {
849 pr_info("netem: invalid attributes len %d\n", nested_len);
853 if (nested_len >= nla_attr_size(0))
854 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
857 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
861 /* Parse netlink message to set options */
862 static int netem_change(struct Qdisc *sch, struct nlattr *opt)
864 struct netem_sched_data *q = qdisc_priv(sch);
865 struct nlattr *tb[TCA_NETEM_MAX + 1];
866 struct tc_netem_qopt *qopt;
867 struct clgstate old_clg;
868 int old_loss_model = CLG_RANDOM;
874 qopt = nla_data(opt);
875 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
879 /* backup q->clg and q->loss_model */
881 old_loss_model = q->loss_model;
883 if (tb[TCA_NETEM_LOSS]) {
884 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
886 q->loss_model = old_loss_model;
890 q->loss_model = CLG_RANDOM;
893 if (tb[TCA_NETEM_DELAY_DIST]) {
894 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
896 /* recover clg and loss_model, in case of
897 * q->clg and q->loss_model were modified
901 q->loss_model = old_loss_model;
906 sch->limit = qopt->limit;
908 q->latency = qopt->latency;
909 q->jitter = qopt->jitter;
910 q->limit = qopt->limit;
913 q->loss = qopt->loss;
914 q->duplicate = qopt->duplicate;
916 /* for compatibility with earlier versions.
917 * if gap is set, need to assume 100% probability
922 if (tb[TCA_NETEM_CORR])
923 get_correlation(q, tb[TCA_NETEM_CORR]);
925 if (tb[TCA_NETEM_REORDER])
926 get_reorder(q, tb[TCA_NETEM_REORDER]);
928 if (tb[TCA_NETEM_CORRUPT])
929 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
931 if (tb[TCA_NETEM_RATE])
932 get_rate(q, tb[TCA_NETEM_RATE]);
934 if (tb[TCA_NETEM_RATE64])
935 q->rate = max_t(u64, q->rate,
936 nla_get_u64(tb[TCA_NETEM_RATE64]));
938 if (tb[TCA_NETEM_ECN])
939 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
944 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
946 struct netem_sched_data *q = qdisc_priv(sch);
949 qdisc_watchdog_init(&q->watchdog, sch);
954 q->loss_model = CLG_RANDOM;
955 ret = netem_change(sch, opt);
957 pr_info("netem: change failed\n");
961 static void netem_destroy(struct Qdisc *sch)
963 struct netem_sched_data *q = qdisc_priv(sch);
965 qdisc_watchdog_cancel(&q->watchdog);
967 qdisc_destroy(q->qdisc);
968 dist_free(q->delay_dist);
971 static int dump_loss_model(const struct netem_sched_data *q,
976 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
978 goto nla_put_failure;
980 switch (q->loss_model) {
982 /* legacy loss model */
983 nla_nest_cancel(skb, nest);
984 return 0; /* no data */
987 struct tc_netem_gimodel gi = {
995 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
996 goto nla_put_failure;
1000 struct tc_netem_gemodel ge = {
1007 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1008 goto nla_put_failure;
1013 nla_nest_end(skb, nest);
1017 nla_nest_cancel(skb, nest);
1021 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1023 const struct netem_sched_data *q = qdisc_priv(sch);
1024 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1025 struct tc_netem_qopt qopt;
1026 struct tc_netem_corr cor;
1027 struct tc_netem_reorder reorder;
1028 struct tc_netem_corrupt corrupt;
1029 struct tc_netem_rate rate;
1031 qopt.latency = q->latency;
1032 qopt.jitter = q->jitter;
1033 qopt.limit = q->limit;
1034 qopt.loss = q->loss;
1036 qopt.duplicate = q->duplicate;
1037 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1038 goto nla_put_failure;
1040 cor.delay_corr = q->delay_cor.rho;
1041 cor.loss_corr = q->loss_cor.rho;
1042 cor.dup_corr = q->dup_cor.rho;
1043 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1044 goto nla_put_failure;
1046 reorder.probability = q->reorder;
1047 reorder.correlation = q->reorder_cor.rho;
1048 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1049 goto nla_put_failure;
1051 corrupt.probability = q->corrupt;
1052 corrupt.correlation = q->corrupt_cor.rho;
1053 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1054 goto nla_put_failure;
1056 if (q->rate >= (1ULL << 32)) {
1057 if (nla_put_u64(skb, TCA_NETEM_RATE64, q->rate))
1058 goto nla_put_failure;
1061 rate.rate = q->rate;
1063 rate.packet_overhead = q->packet_overhead;
1064 rate.cell_size = q->cell_size;
1065 rate.cell_overhead = q->cell_overhead;
1066 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1067 goto nla_put_failure;
1069 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1070 goto nla_put_failure;
1072 if (dump_loss_model(q, skb) != 0)
1073 goto nla_put_failure;
1075 return nla_nest_end(skb, nla);
1078 nlmsg_trim(skb, nla);
1082 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1083 struct sk_buff *skb, struct tcmsg *tcm)
1085 struct netem_sched_data *q = qdisc_priv(sch);
1087 if (cl != 1 || !q->qdisc) /* only one class */
1090 tcm->tcm_handle |= TC_H_MIN(1);
1091 tcm->tcm_info = q->qdisc->handle;
1096 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1099 struct netem_sched_data *q = qdisc_priv(sch);
1101 *old = qdisc_replace(sch, new, &q->qdisc);
1105 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1107 struct netem_sched_data *q = qdisc_priv(sch);
1111 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
1116 static void netem_put(struct Qdisc *sch, unsigned long arg)
1120 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1122 if (!walker->stop) {
1123 if (walker->count >= walker->skip)
1124 if (walker->fn(sch, 1, walker) < 0) {
1132 static const struct Qdisc_class_ops netem_class_ops = {
1133 .graft = netem_graft,
1138 .dump = netem_dump_class,
1141 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1143 .cl_ops = &netem_class_ops,
1144 .priv_size = sizeof(struct netem_sched_data),
1145 .enqueue = netem_enqueue,
1146 .dequeue = netem_dequeue,
1147 .peek = qdisc_peek_dequeued,
1150 .reset = netem_reset,
1151 .destroy = netem_destroy,
1152 .change = netem_change,
1154 .owner = THIS_MODULE,
1158 static int __init netem_module_init(void)
1160 pr_info("netem: version " VERSION "\n");
1161 return register_qdisc(&netem_qdisc_ops);
1163 static void __exit netem_module_exit(void)
1165 unregister_qdisc(&netem_qdisc_ops);
1167 module_init(netem_module_init)
1168 module_exit(netem_module_exit)
1169 MODULE_LICENSE("GPL");