2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu *ht[256];
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
95 struct tcf_result res;
99 struct rsvp_session *sess;
101 struct work_struct work;
106 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
108 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
112 return (h ^ protocol ^ tunnelid) & 0xFF;
115 static inline unsigned int hash_src(__be32 *src)
117 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
125 #define RSVP_APPLY_RESULT() \
127 int r = tcf_exts_exec(skb, &f->exts, res); \
134 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
135 struct tcf_result *res)
137 struct rsvp_head *head = rcu_dereference_bh(tp->root);
138 struct rsvp_session *s;
139 struct rsvp_filter *f;
145 #if RSVP_DST_LEN == 4
146 struct ipv6hdr *nhptr;
148 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
150 nhptr = ipv6_hdr(skb);
154 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
160 #if RSVP_DST_LEN == 4
161 src = &nhptr->saddr.s6_addr32[0];
162 dst = &nhptr->daddr.s6_addr32[0];
163 protocol = nhptr->nexthdr;
164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
168 protocol = nhptr->protocol;
169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170 if (ip_is_fragment(nhptr))
174 h1 = hash_dst(dst, protocol, tunnelid);
177 for (s = rcu_dereference_bh(head->ht[h1]); s;
178 s = rcu_dereference_bh(s->next)) {
179 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
180 protocol == s->protocol &&
182 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
183 #if RSVP_DST_LEN == 4
184 dst[0] == s->dst[0] &&
185 dst[1] == s->dst[1] &&
186 dst[2] == s->dst[2] &&
188 tunnelid == s->tunnelid) {
190 for (f = rcu_dereference_bh(s->ht[h2]); f;
191 f = rcu_dereference_bh(f->next)) {
192 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
193 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
194 #if RSVP_DST_LEN == 4
196 src[0] == f->src[0] &&
197 src[1] == f->src[1] &&
205 if (f->tunnelhdr == 0)
208 tunnelid = f->res.classid;
209 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
214 /* And wildcard bucket... */
215 for (f = rcu_dereference_bh(s->ht[16]); f;
216 f = rcu_dereference_bh(f->next)) {
227 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
229 struct rsvp_head *head = rtnl_dereference(tp->root);
230 struct rsvp_session *s;
231 struct rsvp_filter __rcu **ins;
232 struct rsvp_filter *pins;
233 unsigned int h1 = h & 0xFF;
234 unsigned int h2 = (h >> 8) & 0xFF;
236 for (s = rtnl_dereference(head->ht[h1]); s;
237 s = rtnl_dereference(s->next)) {
238 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
239 ins = &pins->next, pins = rtnl_dereference(*ins)) {
240 if (pins->handle == h) {
241 RCU_INIT_POINTER(n->next, pins->next);
242 rcu_assign_pointer(*ins, n);
248 /* Something went wrong if we are trying to replace a non-existant
249 * node. Mind as well halt instead of silently failing.
254 static void *rsvp_get(struct tcf_proto *tp, u32 handle)
256 struct rsvp_head *head = rtnl_dereference(tp->root);
257 struct rsvp_session *s;
258 struct rsvp_filter *f;
259 unsigned int h1 = handle & 0xFF;
260 unsigned int h2 = (handle >> 8) & 0xFF;
265 for (s = rtnl_dereference(head->ht[h1]); s;
266 s = rtnl_dereference(s->next)) {
267 for (f = rtnl_dereference(s->ht[h2]); f;
268 f = rtnl_dereference(f->next)) {
269 if (f->handle == handle)
276 static int rsvp_init(struct tcf_proto *tp)
278 struct rsvp_head *data;
280 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
282 rcu_assign_pointer(tp->root, data);
288 static void __rsvp_delete_filter(struct rsvp_filter *f)
290 tcf_exts_destroy(&f->exts);
291 tcf_exts_put_net(&f->exts);
295 static void rsvp_delete_filter_work(struct work_struct *work)
297 struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
300 __rsvp_delete_filter(f);
304 static void rsvp_delete_filter_rcu(struct rcu_head *head)
306 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
308 INIT_WORK(&f->work, rsvp_delete_filter_work);
309 tcf_queue_work(&f->work);
312 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
314 tcf_unbind_filter(tp, &f->res);
315 /* all classifiers are required to call tcf_exts_destroy() after rcu
316 * grace period, since converted-to-rcu actions are relying on that
317 * in cleanup() callback
319 if (tcf_exts_get_net(&f->exts))
320 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
322 __rsvp_delete_filter(f);
325 static void rsvp_destroy(struct tcf_proto *tp)
327 struct rsvp_head *data = rtnl_dereference(tp->root);
333 for (h1 = 0; h1 < 256; h1++) {
334 struct rsvp_session *s;
336 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
337 RCU_INIT_POINTER(data->ht[h1], s->next);
339 for (h2 = 0; h2 <= 16; h2++) {
340 struct rsvp_filter *f;
342 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
343 rcu_assign_pointer(s->ht[h2], f->next);
344 rsvp_delete_filter(tp, f);
350 kfree_rcu(data, rcu);
353 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last)
355 struct rsvp_head *head = rtnl_dereference(tp->root);
356 struct rsvp_filter *nfp, *f = arg;
357 struct rsvp_filter __rcu **fp;
358 unsigned int h = f->handle;
359 struct rsvp_session __rcu **sp;
360 struct rsvp_session *nsp, *s = f->sess;
363 fp = &s->ht[(h >> 8) & 0xFF];
364 for (nfp = rtnl_dereference(*fp); nfp;
365 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
367 RCU_INIT_POINTER(*fp, f->next);
368 rsvp_delete_filter(tp, f);
372 for (i = 0; i <= 16; i++)
376 /* OK, session has no flows */
377 sp = &head->ht[h & 0xFF];
378 for (nsp = rtnl_dereference(*sp); nsp;
379 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
381 RCU_INIT_POINTER(*sp, s->next);
393 for (h1 = 0; h1 < 256; h1++) {
394 if (rcu_access_pointer(head->ht[h1])) {
403 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
405 struct rsvp_head *data = rtnl_dereference(tp->root);
411 if ((data->hgenerator += 0x10000) == 0)
412 data->hgenerator = 0x10000;
413 h = data->hgenerator|salt;
414 if (!rsvp_get(tp, h))
420 static int tunnel_bts(struct rsvp_head *data)
422 int n = data->tgenerator >> 5;
423 u32 b = 1 << (data->tgenerator & 0x1F);
425 if (data->tmap[n] & b)
431 static void tunnel_recycle(struct rsvp_head *data)
433 struct rsvp_session __rcu **sht = data->ht;
437 memset(tmap, 0, sizeof(tmap));
439 for (h1 = 0; h1 < 256; h1++) {
440 struct rsvp_session *s;
441 for (s = rtnl_dereference(sht[h1]); s;
442 s = rtnl_dereference(s->next)) {
443 for (h2 = 0; h2 <= 16; h2++) {
444 struct rsvp_filter *f;
446 for (f = rtnl_dereference(s->ht[h2]); f;
447 f = rtnl_dereference(f->next)) {
448 if (f->tunnelhdr == 0)
450 data->tgenerator = f->res.classid;
457 memcpy(data->tmap, tmap, sizeof(tmap));
460 static u32 gen_tunnel(struct rsvp_head *data)
464 for (k = 0; k < 2; k++) {
465 for (i = 255; i > 0; i--) {
466 if (++data->tgenerator == 0)
467 data->tgenerator = 1;
468 if (tunnel_bts(data))
469 return data->tgenerator;
471 tunnel_recycle(data);
476 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
477 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
478 [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
479 [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
480 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
483 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
484 struct tcf_proto *tp, unsigned long base,
487 void **arg, bool ovr)
489 struct rsvp_head *data = rtnl_dereference(tp->root);
490 struct rsvp_filter *f, *nfp;
491 struct rsvp_filter __rcu **fp;
492 struct rsvp_session *nsp, *s;
493 struct rsvp_session __rcu **sp;
494 struct tc_rsvp_pinfo *pinfo = NULL;
495 struct nlattr *opt = tca[TCA_OPTIONS];
496 struct nlattr *tb[TCA_RSVP_MAX + 1];
503 return handle ? -EINVAL : 0;
505 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
509 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
512 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
518 /* Node exists: adjust only classid */
519 struct rsvp_filter *n;
521 if (f->handle != handle && handle)
524 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
530 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
536 if (tb[TCA_RSVP_CLASSID]) {
537 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
538 tcf_bind_filter(tp, &n->res, base);
541 tcf_exts_change(&n->exts, &e);
542 rsvp_replace(tp, n, handle);
546 /* Now more serious part... */
550 if (tb[TCA_RSVP_DST] == NULL)
554 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
558 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
562 if (tb[TCA_RSVP_SRC]) {
563 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
564 h2 = hash_src(f->src);
566 if (tb[TCA_RSVP_PINFO]) {
567 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
569 f->tunnelhdr = pinfo->tunnelhdr;
571 if (tb[TCA_RSVP_CLASSID])
572 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
574 dst = nla_data(tb[TCA_RSVP_DST]);
575 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
578 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
583 if (f->res.classid > 255)
587 if (f->res.classid == 0 &&
588 (f->res.classid = gen_tunnel(data)) == 0)
592 for (sp = &data->ht[h1];
593 (s = rtnl_dereference(*sp)) != NULL;
595 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
596 pinfo && pinfo->protocol == s->protocol &&
597 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
598 #if RSVP_DST_LEN == 4
599 dst[0] == s->dst[0] &&
600 dst[1] == s->dst[1] &&
601 dst[2] == s->dst[2] &&
603 pinfo->tunnelid == s->tunnelid) {
606 /* OK, we found appropriate session */
611 if (f->tunnelhdr == 0)
612 tcf_bind_filter(tp, &f->res, base);
614 tcf_exts_change(&f->exts, &e);
617 for (nfp = rtnl_dereference(*fp); nfp;
618 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
619 __u32 mask = nfp->spi.mask & f->spi.mask;
621 if (mask != f->spi.mask)
624 RCU_INIT_POINTER(f->next, nfp);
625 rcu_assign_pointer(*fp, f);
632 /* No session found. Create new one. */
635 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
638 memcpy(s->dst, dst, sizeof(s->dst));
642 s->protocol = pinfo->protocol;
643 s->tunnelid = pinfo->tunnelid;
646 for (nsp = rtnl_dereference(*sp); nsp;
647 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
648 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
651 RCU_INIT_POINTER(s->next, nsp);
652 rcu_assign_pointer(*sp, s);
657 tcf_exts_destroy(&f->exts);
660 tcf_exts_destroy(&e);
664 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
666 struct rsvp_head *head = rtnl_dereference(tp->root);
672 for (h = 0; h < 256; h++) {
673 struct rsvp_session *s;
675 for (s = rtnl_dereference(head->ht[h]); s;
676 s = rtnl_dereference(s->next)) {
677 for (h1 = 0; h1 <= 16; h1++) {
678 struct rsvp_filter *f;
680 for (f = rtnl_dereference(s->ht[h1]); f;
681 f = rtnl_dereference(f->next)) {
682 if (arg->count < arg->skip) {
686 if (arg->fn(tp, f, arg) < 0) {
697 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
698 struct sk_buff *skb, struct tcmsg *t)
700 struct rsvp_filter *f = fh;
701 struct rsvp_session *s;
703 struct tc_rsvp_pinfo pinfo;
709 t->tcm_handle = f->handle;
711 nest = nla_nest_start(skb, TCA_OPTIONS);
713 goto nla_put_failure;
715 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
716 goto nla_put_failure;
719 pinfo.protocol = s->protocol;
720 pinfo.tunnelid = s->tunnelid;
721 pinfo.tunnelhdr = f->tunnelhdr;
723 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
724 goto nla_put_failure;
725 if (f->res.classid &&
726 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
727 goto nla_put_failure;
728 if (((f->handle >> 8) & 0xFF) != 16 &&
729 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
730 goto nla_put_failure;
732 if (tcf_exts_dump(skb, &f->exts) < 0)
733 goto nla_put_failure;
735 nla_nest_end(skb, nest);
737 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
738 goto nla_put_failure;
742 nla_nest_cancel(skb, nest);
746 static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl)
748 struct rsvp_filter *f = fh;
750 if (f && f->res.classid == classid)
754 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
756 .classify = rsvp_classify,
758 .destroy = rsvp_destroy,
760 .change = rsvp_change,
761 .delete = rsvp_delete,
764 .bind_class = rsvp_bind_class,
765 .owner = THIS_MODULE,
768 static int __init init_rsvp(void)
770 return register_tcf_proto_ops(&RSVP_OPS);
773 static void __exit exit_rsvp(void)
775 unregister_tcf_proto_ops(&RSVP_OPS);
778 module_init(init_rsvp)
779 module_exit(exit_rsvp)