2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #ifdef CONFIG_RFS_ACCEL
35 #include <linux/hash.h>
36 #include <linux/mlx5/fs.h>
38 #include <linux/ipv6.h>
46 struct in6_addr src_ipv6;
50 struct in6_addr dst_ipv6;
57 struct mlx5e_priv *priv;
58 struct work_struct arfs_work;
59 struct mlx5_flow_rule *rule;
60 struct hlist_node hlist;
62 /* Flow ID passed to ndo_rx_flow_steer */
64 /* Filter ID returned by ndo_rx_flow_steer */
66 struct arfs_tuple tuple;
69 #define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \
70 for (i = 0; i < ARFS_NUM_TYPES; i++) \
71 mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j)
73 #define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \
74 for (j = 0; j < ARFS_HASH_SIZE; j++) \
75 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
77 static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
81 return MLX5E_TT_IPV4_TCP;
83 return MLX5E_TT_IPV4_UDP;
85 return MLX5E_TT_IPV6_TCP;
87 return MLX5E_TT_IPV6_UDP;
93 static int arfs_disable(struct mlx5e_priv *priv)
95 struct mlx5_flow_destination dest;
96 struct mlx5e_tir *tir = priv->indir_tir;
101 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
102 for (i = 0; i < ARFS_NUM_TYPES; i++) {
103 dest.tir_num = tir[i].tirn;
105 /* Modify ttc rules destination to bypass the aRFS tables*/
106 err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
109 netdev_err(priv->netdev,
110 "%s: modify ttc destination failed\n",
118 static void arfs_del_rules(struct mlx5e_priv *priv);
120 int mlx5e_arfs_disable(struct mlx5e_priv *priv)
122 arfs_del_rules(priv);
124 return arfs_disable(priv);
127 int mlx5e_arfs_enable(struct mlx5e_priv *priv)
129 struct mlx5_flow_destination dest;
134 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
135 for (i = 0; i < ARFS_NUM_TYPES; i++) {
136 dest.ft = priv->fs.arfs.arfs_tables[i].ft.t;
138 /* Modify ttc rules destination to point on the aRFS FTs */
139 err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
142 netdev_err(priv->netdev,
143 "%s: modify ttc destination failed err=%d\n",
152 static void arfs_destroy_table(struct arfs_table *arfs_t)
154 mlx5_del_flow_rule(arfs_t->default_rule);
155 mlx5e_destroy_flow_table(&arfs_t->ft);
158 void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv)
162 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
165 arfs_del_rules(priv);
166 destroy_workqueue(priv->fs.arfs.wq);
167 for (i = 0; i < ARFS_NUM_TYPES; i++) {
168 if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t))
169 arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]);
173 static int arfs_add_default_rule(struct mlx5e_priv *priv,
176 struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
177 struct mlx5_flow_destination dest;
178 struct mlx5e_tir *tir = priv->indir_tir;
179 struct mlx5_flow_spec *spec;
182 spec = mlx5_vzalloc(sizeof(*spec));
184 netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
189 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
192 dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn;
195 dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn;
198 dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn;
201 dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn;
208 arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
209 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
210 MLX5_FS_DEFAULT_FLOW_TAG,
212 if (IS_ERR(arfs_t->default_rule)) {
213 err = PTR_ERR(arfs_t->default_rule);
214 arfs_t->default_rule = NULL;
215 netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n",
223 #define MLX5E_ARFS_NUM_GROUPS 2
224 #define MLX5E_ARFS_GROUP1_SIZE BIT(12)
225 #define MLX5E_ARFS_GROUP2_SIZE BIT(0)
226 #define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\
227 MLX5E_ARFS_GROUP2_SIZE)
228 static int arfs_create_groups(struct mlx5e_flow_table *ft,
231 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
232 void *outer_headers_c;
238 ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS,
239 sizeof(*ft->g), GFP_KERNEL);
240 in = mlx5_vzalloc(inlen);
247 mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
248 outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc,
250 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype);
254 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
255 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
259 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport);
260 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport);
270 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
271 src_ipv4_src_ipv6.ipv4_layout.ipv4);
272 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
273 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
277 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
278 src_ipv4_src_ipv6.ipv6_layout.ipv6),
280 memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
281 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
289 MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
290 MLX5_SET_CFG(in, start_flow_index, ix);
291 ix += MLX5E_ARFS_GROUP1_SIZE;
292 MLX5_SET_CFG(in, end_flow_index, ix - 1);
293 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
294 if (IS_ERR(ft->g[ft->num_groups]))
298 memset(in, 0, inlen);
299 MLX5_SET_CFG(in, start_flow_index, ix);
300 ix += MLX5E_ARFS_GROUP2_SIZE;
301 MLX5_SET_CFG(in, end_flow_index, ix - 1);
302 ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
303 if (IS_ERR(ft->g[ft->num_groups]))
311 err = PTR_ERR(ft->g[ft->num_groups]);
312 ft->g[ft->num_groups] = NULL;
319 static int arfs_create_table(struct mlx5e_priv *priv,
322 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
323 struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft;
326 ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
327 MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL);
329 err = PTR_ERR(ft->t);
334 err = arfs_create_groups(ft, type);
338 err = arfs_add_default_rule(priv, type);
344 mlx5e_destroy_flow_table(ft);
348 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
353 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
356 spin_lock_init(&priv->fs.arfs.arfs_lock);
357 INIT_LIST_HEAD(&priv->fs.arfs.rules);
358 priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs");
359 if (!priv->fs.arfs.wq)
362 for (i = 0; i < ARFS_NUM_TYPES; i++) {
363 err = arfs_create_table(priv, i);
369 mlx5e_arfs_destroy_tables(priv);
373 #define MLX5E_ARFS_EXPIRY_QUOTA 60
375 static void arfs_may_expire_flow(struct mlx5e_priv *priv)
377 struct arfs_rule *arfs_rule;
378 struct hlist_node *htmp;
383 HLIST_HEAD(del_list);
384 spin_lock_bh(&priv->fs.arfs.arfs_lock);
385 mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
386 if (!work_pending(&arfs_rule->arfs_work) &&
387 rps_may_expire_flow(priv->netdev,
388 arfs_rule->rxq, arfs_rule->flow_id,
389 arfs_rule->filter_id)) {
390 hlist_del_init(&arfs_rule->hlist);
391 hlist_add_head(&arfs_rule->hlist, &del_list);
392 if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA)
396 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
397 hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
399 mlx5_del_flow_rule(arfs_rule->rule);
400 hlist_del(&arfs_rule->hlist);
405 static void arfs_del_rules(struct mlx5e_priv *priv)
407 struct hlist_node *htmp;
408 struct arfs_rule *rule;
412 HLIST_HEAD(del_list);
413 spin_lock_bh(&priv->fs.arfs.arfs_lock);
414 mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) {
415 hlist_del_init(&rule->hlist);
416 hlist_add_head(&rule->hlist, &del_list);
418 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
420 hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
421 cancel_work_sync(&rule->arfs_work);
423 mlx5_del_flow_rule(rule->rule);
424 hlist_del(&rule->hlist);
429 static struct hlist_head *
430 arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port,
436 l = (__force unsigned long)src_port |
437 ((__force unsigned long)dst_port << 2);
439 bucket_idx = hash_long(l, ARFS_HASH_SHIFT);
441 return &arfs_t->rules_hash[bucket_idx];
444 static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
445 u8 ip_proto, __be16 etype)
447 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP)
448 return &arfs->arfs_tables[ARFS_IPV4_TCP];
449 if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP)
450 return &arfs->arfs_tables[ARFS_IPV4_UDP];
451 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP)
452 return &arfs->arfs_tables[ARFS_IPV6_TCP];
453 if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP)
454 return &arfs->arfs_tables[ARFS_IPV6_UDP];
459 static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
460 struct arfs_rule *arfs_rule)
462 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
463 struct arfs_tuple *tuple = &arfs_rule->tuple;
464 struct mlx5_flow_rule *rule = NULL;
465 struct mlx5_flow_destination dest;
466 struct arfs_table *arfs_table;
467 struct mlx5_flow_spec *spec;
468 struct mlx5_flow_table *ft;
471 spec = mlx5_vzalloc(sizeof(*spec));
473 netdev_err(priv->netdev, "%s: alloc failed\n", __func__);
477 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
478 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
479 outer_headers.ethertype);
480 MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype,
481 ntohs(tuple->etype));
482 arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype);
488 ft = arfs_table->ft.t;
489 if (tuple->ip_proto == IPPROTO_TCP) {
490 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
491 outer_headers.tcp_dport);
492 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
493 outer_headers.tcp_sport);
494 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport,
495 ntohs(tuple->dst_port));
496 MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport,
497 ntohs(tuple->src_port));
499 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
500 outer_headers.udp_dport);
501 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
502 outer_headers.udp_sport);
503 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport,
504 ntohs(tuple->dst_port));
505 MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport,
506 ntohs(tuple->src_port));
508 if (tuple->etype == htons(ETH_P_IP)) {
509 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
510 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4),
513 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
514 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
517 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
518 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
519 MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
520 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
522 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
523 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
526 memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value,
527 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
530 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
531 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6),
534 memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
535 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
539 dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
540 dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
541 rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
542 MLX5_FS_DEFAULT_FLOW_TAG,
546 netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
547 __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
552 return err ? ERR_PTR(err) : rule;
555 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
556 struct mlx5_flow_rule *rule, u16 rxq)
558 struct mlx5_flow_destination dst;
561 dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
562 dst.tir_num = priv->direct_tir[rxq].tirn;
563 err = mlx5_modify_rule_destination(rule, &dst);
565 netdev_warn(priv->netdev,
566 "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
569 static void arfs_handle_work(struct work_struct *work)
571 struct arfs_rule *arfs_rule = container_of(work,
574 struct mlx5e_priv *priv = arfs_rule->priv;
575 struct mlx5_flow_rule *rule;
577 mutex_lock(&priv->state_lock);
578 if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
579 spin_lock_bh(&priv->fs.arfs.arfs_lock);
580 hlist_del(&arfs_rule->hlist);
581 spin_unlock_bh(&priv->fs.arfs.arfs_lock);
583 mutex_unlock(&priv->state_lock);
587 mutex_unlock(&priv->state_lock);
589 if (!arfs_rule->rule) {
590 rule = arfs_add_rule(priv, arfs_rule);
593 arfs_rule->rule = rule;
595 arfs_modify_rule_rq(priv, arfs_rule->rule,
599 arfs_may_expire_flow(priv);
602 static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv,
603 struct arfs_table *arfs_t,
604 const struct flow_keys *fk,
605 u16 rxq, u32 flow_id)
607 struct arfs_rule *rule;
608 struct arfs_tuple *tuple;
610 rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
616 INIT_WORK(&rule->arfs_work, arfs_handle_work);
618 tuple = &rule->tuple;
619 tuple->etype = fk->basic.n_proto;
620 tuple->ip_proto = fk->basic.ip_proto;
621 if (tuple->etype == htons(ETH_P_IP)) {
622 tuple->src_ipv4 = fk->addrs.v4addrs.src;
623 tuple->dst_ipv4 = fk->addrs.v4addrs.dst;
625 memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
626 sizeof(struct in6_addr));
627 memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
628 sizeof(struct in6_addr));
630 tuple->src_port = fk->ports.src;
631 tuple->dst_port = fk->ports.dst;
633 rule->flow_id = flow_id;
634 rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER;
636 hlist_add_head(&rule->hlist,
637 arfs_hash_bucket(arfs_t, tuple->src_port,
642 static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk)
644 if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst)
646 if (tuple->etype != fk->basic.n_proto)
648 if (tuple->etype == htons(ETH_P_IP))
649 return tuple->src_ipv4 == fk->addrs.v4addrs.src &&
650 tuple->dst_ipv4 == fk->addrs.v4addrs.dst;
651 if (tuple->etype == htons(ETH_P_IPV6))
652 return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src,
653 sizeof(struct in6_addr)) &&
654 !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst,
655 sizeof(struct in6_addr));
659 static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t,
660 const struct flow_keys *fk)
662 struct arfs_rule *arfs_rule;
663 struct hlist_head *head;
665 head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst);
666 hlist_for_each_entry(arfs_rule, head, hlist) {
667 if (arfs_cmp(&arfs_rule->tuple, fk))
674 int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
675 u16 rxq_index, u32 flow_id)
677 struct mlx5e_priv *priv = netdev_priv(dev);
678 struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
679 struct arfs_table *arfs_t;
680 struct arfs_rule *arfs_rule;
683 if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
684 return -EPROTONOSUPPORT;
686 if (fk.basic.n_proto != htons(ETH_P_IP) &&
687 fk.basic.n_proto != htons(ETH_P_IPV6))
688 return -EPROTONOSUPPORT;
690 if (skb->encapsulation)
691 return -EPROTONOSUPPORT;
693 arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto);
695 return -EPROTONOSUPPORT;
697 spin_lock_bh(&arfs->arfs_lock);
698 arfs_rule = arfs_find_rule(arfs_t, &fk);
700 if (arfs_rule->rxq == rxq_index) {
701 spin_unlock_bh(&arfs->arfs_lock);
702 return arfs_rule->filter_id;
704 arfs_rule->rxq = rxq_index;
706 arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id);
708 spin_unlock_bh(&arfs->arfs_lock);
712 queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work);
713 spin_unlock_bh(&arfs->arfs_lock);
714 return arfs_rule->filter_id;