1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
8 #include <linux/kernel.h>
9 #include <linux/init.h>
10 #include <linux/module.h>
11 #include <linux/list.h>
12 #include <linux/log2.h>
13 #include <linux/jhash.h>
14 #include <linux/netlink.h>
15 #include <linux/workqueue.h>
16 #include <linux/rhashtable.h>
17 #include <linux/netfilter.h>
18 #include <linux/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_tables_core.h>
20 #include <net/netns/generic.h>
22 extern unsigned int nf_tables_net_id;
24 /* We target a hash table size of 4, element hint is 75% of final size */
25 #define NFT_RHASH_ELEMENT_HINT 3
29 struct delayed_work gc_work;
32 struct nft_rhash_elem {
33 struct rhash_head node;
34 struct nft_set_ext ext;
37 struct nft_rhash_cmp_arg {
38 const struct nft_set *set;
43 static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
45 const struct nft_rhash_cmp_arg *arg = data;
47 return jhash(arg->key, len, seed);
50 static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed)
52 const struct nft_rhash_elem *he = data;
54 return jhash(nft_set_ext_key(&he->ext), len, seed);
57 static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
60 const struct nft_rhash_cmp_arg *x = arg->key;
61 const struct nft_rhash_elem *he = ptr;
63 if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
65 if (nft_set_elem_is_dead(&he->ext))
67 if (nft_set_elem_expired(&he->ext))
69 if (!nft_set_elem_active(&he->ext, x->genmask))
74 static const struct rhashtable_params nft_rhash_params = {
75 .head_offset = offsetof(struct nft_rhash_elem, node),
76 .hashfn = nft_rhash_key,
77 .obj_hashfn = nft_rhash_obj,
78 .obj_cmpfn = nft_rhash_cmp,
79 .automatic_shrinking = true,
82 static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
83 const u32 *key, const struct nft_set_ext **ext)
85 struct nft_rhash *priv = nft_set_priv(set);
86 const struct nft_rhash_elem *he;
87 struct nft_rhash_cmp_arg arg = {
88 .genmask = nft_genmask_cur(net),
93 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
100 static void *nft_rhash_get(const struct net *net, const struct nft_set *set,
101 const struct nft_set_elem *elem, unsigned int flags)
103 struct nft_rhash *priv = nft_set_priv(set);
104 struct nft_rhash_elem *he;
105 struct nft_rhash_cmp_arg arg = {
106 .genmask = nft_genmask_cur(net),
108 .key = elem->key.val.data,
111 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
115 return ERR_PTR(-ENOENT);
118 static bool nft_rhash_update(struct nft_set *set, const u32 *key,
119 void *(*new)(struct nft_set *,
120 const struct nft_expr *,
121 struct nft_regs *regs),
122 const struct nft_expr *expr,
123 struct nft_regs *regs,
124 const struct nft_set_ext **ext)
126 struct nft_rhash *priv = nft_set_priv(set);
127 struct nft_rhash_elem *he, *prev;
128 struct nft_rhash_cmp_arg arg = {
129 .genmask = NFT_GENMASK_ANY,
134 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
138 he = new(set, expr, regs);
142 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
147 /* Another cpu may race to insert the element with the same key */
149 nft_set_elem_destroy(set, he, true);
150 atomic_dec(&set->nelems);
159 nft_set_elem_destroy(set, he, true);
160 atomic_dec(&set->nelems);
165 static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
166 const struct nft_set_elem *elem,
167 struct nft_set_ext **ext)
169 struct nft_rhash *priv = nft_set_priv(set);
170 struct nft_rhash_elem *he = elem->priv;
171 struct nft_rhash_cmp_arg arg = {
172 .genmask = nft_genmask_next(net),
174 .key = elem->key.val.data,
176 struct nft_rhash_elem *prev;
178 prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
181 return PTR_ERR(prev);
189 static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
190 const struct nft_set_elem *elem)
192 struct nft_rhash_elem *he = elem->priv;
194 nft_set_elem_change_active(net, set, &he->ext);
197 static bool nft_rhash_flush(const struct net *net,
198 const struct nft_set *set, void *priv)
200 struct nft_rhash_elem *he = priv;
202 nft_set_elem_change_active(net, set, &he->ext);
207 static void *nft_rhash_deactivate(const struct net *net,
208 const struct nft_set *set,
209 const struct nft_set_elem *elem)
211 struct nft_rhash *priv = nft_set_priv(set);
212 struct nft_rhash_elem *he;
213 struct nft_rhash_cmp_arg arg = {
214 .genmask = nft_genmask_next(net),
216 .key = elem->key.val.data,
220 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
222 nft_set_elem_change_active(net, set, &he->ext);
229 static void nft_rhash_remove(const struct net *net,
230 const struct nft_set *set,
231 const struct nft_set_elem *elem)
233 struct nft_rhash *priv = nft_set_priv(set);
234 struct nft_rhash_elem *he = elem->priv;
236 rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
239 static bool nft_rhash_delete(const struct nft_set *set,
242 struct nft_rhash *priv = nft_set_priv(set);
243 struct nft_rhash_cmp_arg arg = {
244 .genmask = NFT_GENMASK_ANY,
248 struct nft_rhash_elem *he;
250 he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
254 nft_set_elem_dead(&he->ext);
259 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
260 struct nft_set_iter *iter)
262 struct nft_rhash *priv = nft_set_priv(set);
263 struct nft_rhash_elem *he;
264 struct rhashtable_iter hti;
265 struct nft_set_elem elem;
267 rhashtable_walk_enter(&priv->ht, &hti);
268 rhashtable_walk_start(&hti);
270 while ((he = rhashtable_walk_next(&hti))) {
272 if (PTR_ERR(he) != -EAGAIN) {
273 iter->err = PTR_ERR(he);
280 if (iter->count < iter->skip)
282 if (!nft_set_elem_active(&he->ext, iter->genmask))
287 iter->err = iter->fn(ctx, set, iter, &elem);
294 rhashtable_walk_stop(&hti);
295 rhashtable_walk_exit(&hti);
298 static void nft_rhash_gc(struct work_struct *work)
300 struct nftables_pernet *nft_net;
302 struct nft_rhash_elem *he;
303 struct nft_rhash *priv;
304 struct rhashtable_iter hti;
305 struct nft_trans_gc *gc;
309 priv = container_of(work, struct nft_rhash, gc_work.work);
310 set = nft_set_container_of(priv);
311 net = read_pnet(&set->net);
312 nft_net = net_generic(net, nf_tables_net_id);
313 gc_seq = READ_ONCE(nft_net->gc_seq);
315 if (nft_set_gc_is_pending(set))
318 gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
322 rhashtable_walk_enter(&priv->ht, &hti);
323 rhashtable_walk_start(&hti);
325 while ((he = rhashtable_walk_next(&hti))) {
327 nft_trans_gc_destroy(gc);
332 /* Ruleset has been updated, try later. */
333 if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
334 nft_trans_gc_destroy(gc);
339 if (nft_set_elem_is_dead(&he->ext))
342 if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
343 struct nft_expr *expr = nft_set_ext_expr(&he->ext);
346 expr->ops->gc(read_pnet(&set->net), expr))
350 if (!nft_set_elem_expired(&he->ext))
353 nft_set_elem_dead(&he->ext);
355 gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
359 nft_trans_gc_elem_add(gc, he);
363 rhashtable_walk_stop(&hti);
364 rhashtable_walk_exit(&hti);
367 nft_trans_gc_queue_async_done(gc);
369 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
370 nft_set_gc_interval(set));
373 static u64 nft_rhash_privsize(const struct nlattr * const nla[],
374 const struct nft_set_desc *desc)
376 return sizeof(struct nft_rhash);
379 static void nft_rhash_gc_init(const struct nft_set *set)
381 struct nft_rhash *priv = nft_set_priv(set);
383 queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
384 nft_set_gc_interval(set));
387 static int nft_rhash_init(const struct nft_set *set,
388 const struct nft_set_desc *desc,
389 const struct nlattr * const tb[])
391 struct nft_rhash *priv = nft_set_priv(set);
392 struct rhashtable_params params = nft_rhash_params;
395 params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT;
396 params.key_len = set->klen;
398 err = rhashtable_init(&priv->ht, ¶ms);
402 INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
403 if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
404 nft_rhash_gc_init(set);
409 struct nft_rhash_ctx {
410 const struct nft_ctx ctx;
411 const struct nft_set *set;
414 static void nft_rhash_elem_destroy(void *ptr, void *arg)
416 struct nft_rhash_ctx *rhash_ctx = arg;
418 nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
421 static void nft_rhash_destroy(const struct nft_ctx *ctx,
422 const struct nft_set *set)
424 struct nft_rhash *priv = nft_set_priv(set);
425 struct nft_rhash_ctx rhash_ctx = {
430 cancel_delayed_work_sync(&priv->gc_work);
431 rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
435 /* Number of buckets is stored in u32, so cap our result to 1U<<31 */
436 #define NFT_MAX_BUCKETS (1U << 31)
438 static u32 nft_hash_buckets(u32 size)
440 u64 val = div_u64((u64)size * 4, 3);
442 if (val >= NFT_MAX_BUCKETS)
443 return NFT_MAX_BUCKETS;
445 return roundup_pow_of_two(val);
448 static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
449 struct nft_set_estimate *est)
452 est->lookup = NFT_SET_CLASS_O_1;
453 est->space = NFT_SET_CLASS_O_N;
461 struct hlist_head table[];
464 struct nft_hash_elem {
465 struct hlist_node node;
466 struct nft_set_ext ext;
469 static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
470 const u32 *key, const struct nft_set_ext **ext)
472 struct nft_hash *priv = nft_set_priv(set);
473 u8 genmask = nft_genmask_cur(net);
474 const struct nft_hash_elem *he;
477 hash = jhash(key, set->klen, priv->seed);
478 hash = reciprocal_scale(hash, priv->buckets);
479 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
480 if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) &&
481 nft_set_elem_active(&he->ext, genmask)) {
489 static void *nft_hash_get(const struct net *net, const struct nft_set *set,
490 const struct nft_set_elem *elem, unsigned int flags)
492 struct nft_hash *priv = nft_set_priv(set);
493 u8 genmask = nft_genmask_cur(net);
494 struct nft_hash_elem *he;
497 hash = jhash(elem->key.val.data, set->klen, priv->seed);
498 hash = reciprocal_scale(hash, priv->buckets);
499 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
500 if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) &&
501 nft_set_elem_active(&he->ext, genmask))
504 return ERR_PTR(-ENOENT);
507 static bool nft_hash_lookup_fast(const struct net *net,
508 const struct nft_set *set,
509 const u32 *key, const struct nft_set_ext **ext)
511 struct nft_hash *priv = nft_set_priv(set);
512 u8 genmask = nft_genmask_cur(net);
513 const struct nft_hash_elem *he;
517 hash = jhash_1word(k1, priv->seed);
518 hash = reciprocal_scale(hash, priv->buckets);
519 hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
520 k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
522 nft_set_elem_active(&he->ext, genmask)) {
530 static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
531 const struct nft_set_ext *ext)
533 const struct nft_data *key = nft_set_ext_key(ext);
536 if (set->klen == 4) {
538 hash = jhash_1word(k1, priv->seed);
540 hash = jhash(key, set->klen, priv->seed);
542 hash = reciprocal_scale(hash, priv->buckets);
547 static int nft_hash_insert(const struct net *net, const struct nft_set *set,
548 const struct nft_set_elem *elem,
549 struct nft_set_ext **ext)
551 struct nft_hash_elem *this = elem->priv, *he;
552 struct nft_hash *priv = nft_set_priv(set);
553 u8 genmask = nft_genmask_next(net);
556 hash = nft_jhash(set, priv, &this->ext);
557 hlist_for_each_entry(he, &priv->table[hash], node) {
558 if (!memcmp(nft_set_ext_key(&this->ext),
559 nft_set_ext_key(&he->ext), set->klen) &&
560 nft_set_elem_active(&he->ext, genmask)) {
565 hlist_add_head_rcu(&this->node, &priv->table[hash]);
569 static void nft_hash_activate(const struct net *net, const struct nft_set *set,
570 const struct nft_set_elem *elem)
572 struct nft_hash_elem *he = elem->priv;
574 nft_set_elem_change_active(net, set, &he->ext);
577 static bool nft_hash_flush(const struct net *net,
578 const struct nft_set *set, void *priv)
580 struct nft_hash_elem *he = priv;
582 nft_set_elem_change_active(net, set, &he->ext);
586 static void *nft_hash_deactivate(const struct net *net,
587 const struct nft_set *set,
588 const struct nft_set_elem *elem)
590 struct nft_hash *priv = nft_set_priv(set);
591 struct nft_hash_elem *this = elem->priv, *he;
592 u8 genmask = nft_genmask_next(net);
595 hash = nft_jhash(set, priv, &this->ext);
596 hlist_for_each_entry(he, &priv->table[hash], node) {
597 if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val,
599 nft_set_elem_active(&he->ext, genmask)) {
600 nft_set_elem_change_active(net, set, &he->ext);
607 static void nft_hash_remove(const struct net *net,
608 const struct nft_set *set,
609 const struct nft_set_elem *elem)
611 struct nft_hash_elem *he = elem->priv;
613 hlist_del_rcu(&he->node);
616 static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
617 struct nft_set_iter *iter)
619 struct nft_hash *priv = nft_set_priv(set);
620 struct nft_hash_elem *he;
621 struct nft_set_elem elem;
624 for (i = 0; i < priv->buckets; i++) {
625 hlist_for_each_entry_rcu(he, &priv->table[i], node) {
626 if (iter->count < iter->skip)
628 if (!nft_set_elem_active(&he->ext, iter->genmask))
633 iter->err = iter->fn(ctx, set, iter, &elem);
642 static u64 nft_hash_privsize(const struct nlattr * const nla[],
643 const struct nft_set_desc *desc)
645 return sizeof(struct nft_hash) +
646 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
649 static int nft_hash_init(const struct nft_set *set,
650 const struct nft_set_desc *desc,
651 const struct nlattr * const tb[])
653 struct nft_hash *priv = nft_set_priv(set);
655 priv->buckets = nft_hash_buckets(desc->size);
656 get_random_bytes(&priv->seed, sizeof(priv->seed));
661 static void nft_hash_destroy(const struct nft_ctx *ctx,
662 const struct nft_set *set)
664 struct nft_hash *priv = nft_set_priv(set);
665 struct nft_hash_elem *he;
666 struct hlist_node *next;
669 for (i = 0; i < priv->buckets; i++) {
670 hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
671 hlist_del_rcu(&he->node);
672 nf_tables_set_elem_destroy(ctx, set, he);
677 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
678 struct nft_set_estimate *est)
686 est->size = sizeof(struct nft_hash) +
687 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
688 (u64)desc->size * sizeof(struct nft_hash_elem);
689 est->lookup = NFT_SET_CLASS_O_1;
690 est->space = NFT_SET_CLASS_O_N;
695 static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
696 struct nft_set_estimate *est)
704 est->size = sizeof(struct nft_hash) +
705 (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
706 (u64)desc->size * sizeof(struct nft_hash_elem);
707 est->lookup = NFT_SET_CLASS_O_1;
708 est->space = NFT_SET_CLASS_O_N;
713 const struct nft_set_type nft_set_rhash_type = {
714 .features = NFT_SET_MAP | NFT_SET_OBJECT |
715 NFT_SET_TIMEOUT | NFT_SET_EVAL,
717 .privsize = nft_rhash_privsize,
718 .elemsize = offsetof(struct nft_rhash_elem, ext),
719 .estimate = nft_rhash_estimate,
720 .init = nft_rhash_init,
721 .gc_init = nft_rhash_gc_init,
722 .destroy = nft_rhash_destroy,
723 .insert = nft_rhash_insert,
724 .activate = nft_rhash_activate,
725 .deactivate = nft_rhash_deactivate,
726 .flush = nft_rhash_flush,
727 .remove = nft_rhash_remove,
728 .lookup = nft_rhash_lookup,
729 .update = nft_rhash_update,
730 .delete = nft_rhash_delete,
731 .walk = nft_rhash_walk,
732 .get = nft_rhash_get,
736 const struct nft_set_type nft_set_hash_type = {
737 .features = NFT_SET_MAP | NFT_SET_OBJECT,
739 .privsize = nft_hash_privsize,
740 .elemsize = offsetof(struct nft_hash_elem, ext),
741 .estimate = nft_hash_estimate,
742 .init = nft_hash_init,
743 .destroy = nft_hash_destroy,
744 .insert = nft_hash_insert,
745 .activate = nft_hash_activate,
746 .deactivate = nft_hash_deactivate,
747 .flush = nft_hash_flush,
748 .remove = nft_hash_remove,
749 .lookup = nft_hash_lookup,
750 .walk = nft_hash_walk,
755 const struct nft_set_type nft_set_hash_fast_type = {
756 .features = NFT_SET_MAP | NFT_SET_OBJECT,
758 .privsize = nft_hash_privsize,
759 .elemsize = offsetof(struct nft_hash_elem, ext),
760 .estimate = nft_hash_fast_estimate,
761 .init = nft_hash_init,
762 .destroy = nft_hash_destroy,
763 .insert = nft_hash_insert,
764 .activate = nft_hash_activate,
765 .deactivate = nft_hash_deactivate,
766 .flush = nft_hash_flush,
767 .remove = nft_hash_remove,
768 .lookup = nft_hash_lookup_fast,
769 .walk = nft_hash_walk,