1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
48 struct mlxsw_sp_router {
49 struct mlxsw_sp *mlxsw_sp;
50 struct mlxsw_sp_rif **rifs;
51 struct mlxsw_sp_vr *vrs;
52 struct rhashtable neigh_ht;
53 struct rhashtable nexthop_group_ht;
54 struct rhashtable nexthop_ht;
55 struct list_head nexthop_list;
57 /* One tree for each protocol: IPv4 and IPv6 */
58 struct mlxsw_sp_lpm_tree *proto_trees[2];
59 struct mlxsw_sp_lpm_tree *trees;
60 unsigned int tree_count;
63 struct delayed_work dw;
64 unsigned long interval; /* ms */
66 struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68 struct list_head nexthop_neighs_list;
69 struct list_head ipip_list;
71 struct notifier_block fib_nb;
72 struct notifier_block netevent_nb;
73 const struct mlxsw_sp_rif_ops **rif_ops_arr;
74 const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
78 struct list_head nexthop_list;
79 struct list_head neigh_list;
80 struct net_device *dev;
81 struct mlxsw_sp_fid *fid;
82 unsigned char addr[ETH_ALEN];
86 const struct mlxsw_sp_rif_ops *ops;
87 struct mlxsw_sp *mlxsw_sp;
89 unsigned int counter_ingress;
90 bool counter_ingress_valid;
91 unsigned int counter_egress;
92 bool counter_egress_valid;
95 struct mlxsw_sp_rif_params {
96 struct net_device *dev;
105 struct mlxsw_sp_rif_subport {
106 struct mlxsw_sp_rif common;
115 struct mlxsw_sp_rif_ipip_lb {
116 struct mlxsw_sp_rif common;
117 struct mlxsw_sp_rif_ipip_lb_config lb_config;
118 u16 ul_vr_id; /* Reserved for Spectrum-2. */
121 struct mlxsw_sp_rif_params_ipip_lb {
122 struct mlxsw_sp_rif_params common;
123 struct mlxsw_sp_rif_ipip_lb_config lb_config;
126 struct mlxsw_sp_rif_ops {
127 enum mlxsw_sp_rif_type type;
130 void (*setup)(struct mlxsw_sp_rif *rif,
131 const struct mlxsw_sp_rif_params *params);
132 int (*configure)(struct mlxsw_sp_rif *rif);
133 void (*deconfigure)(struct mlxsw_sp_rif *rif);
134 struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135 struct netlink_ext_ack *extack);
136 void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141 struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143 const struct mlxsw_sp_fib *fib,
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146 const struct mlxsw_sp_fib *fib);
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150 enum mlxsw_sp_rif_counter_dir dir)
153 case MLXSW_SP_RIF_COUNTER_EGRESS:
154 return &rif->counter_egress;
155 case MLXSW_SP_RIF_COUNTER_INGRESS:
156 return &rif->counter_ingress;
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163 enum mlxsw_sp_rif_counter_dir dir)
166 case MLXSW_SP_RIF_COUNTER_EGRESS:
167 return rif->counter_egress_valid;
168 case MLXSW_SP_RIF_COUNTER_INGRESS:
169 return rif->counter_ingress_valid;
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176 enum mlxsw_sp_rif_counter_dir dir,
180 case MLXSW_SP_RIF_COUNTER_EGRESS:
181 rif->counter_egress_valid = valid;
183 case MLXSW_SP_RIF_COUNTER_INGRESS:
184 rif->counter_ingress_valid = valid;
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190 unsigned int counter_index, bool enable,
191 enum mlxsw_sp_rif_counter_dir dir)
193 char ritr_pl[MLXSW_REG_RITR_LEN];
194 bool is_egress = false;
197 if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
199 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
204 mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
206 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210 struct mlxsw_sp_rif *rif,
211 enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
213 char ricnt_pl[MLXSW_REG_RICNT_LEN];
214 unsigned int *p_counter_index;
218 valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
222 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223 if (!p_counter_index)
225 mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226 MLXSW_REG_RICNT_OPCODE_NOP);
227 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
230 *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235 unsigned int counter_index)
237 char ricnt_pl[MLXSW_REG_RICNT_LEN];
239 mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240 MLXSW_REG_RICNT_OPCODE_CLEAR);
241 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245 struct mlxsw_sp_rif *rif,
246 enum mlxsw_sp_rif_counter_dir dir)
248 unsigned int *p_counter_index;
251 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252 if (!p_counter_index)
254 err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
259 err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
261 goto err_counter_clear;
263 err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264 *p_counter_index, true, dir);
266 goto err_counter_edit;
267 mlxsw_sp_rif_counter_valid_set(rif, dir, true);
272 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278 struct mlxsw_sp_rif *rif,
279 enum mlxsw_sp_rif_counter_dir dir)
281 unsigned int *p_counter_index;
283 if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
286 p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287 if (WARN_ON(!p_counter_index))
289 mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290 *p_counter_index, false, dir);
291 mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
293 mlxsw_sp_rif_counter_valid_set(rif, dir, false);
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
298 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299 struct devlink *devlink;
301 devlink = priv_to_devlink(mlxsw_sp->core);
302 if (!devlink_dpipe_table_counter_enabled(devlink,
303 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
305 mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
310 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
312 mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
317 struct mlxsw_sp_prefix_usage {
318 DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326 struct mlxsw_sp_prefix_usage *prefix_usage2)
328 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333 struct mlxsw_sp_prefix_usage *prefix_usage2)
335 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340 unsigned char prefix_len)
342 set_bit(prefix_len, prefix_usage->b);
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347 unsigned char prefix_len)
349 clear_bit(prefix_len, prefix_usage->b);
352 struct mlxsw_sp_fib_key {
353 unsigned char addr[sizeof(struct in6_addr)];
354 unsigned char prefix_len;
357 enum mlxsw_sp_fib_entry_type {
358 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
362 /* This is a special case of local delivery, where a packet should be
363 * decapsulated on reception. Note that there is no corresponding ENCAP,
364 * because that's a type of next hop, not of FIB entry. (There can be
365 * several next hops in a REMOTE entry, and some of them may be
366 * encapsulating entries.)
368 MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
371 struct mlxsw_sp_nexthop_group;
373 struct mlxsw_sp_fib_node {
374 struct list_head entry_list;
375 struct list_head list;
376 struct rhash_head ht_node;
377 struct mlxsw_sp_fib *fib;
378 struct mlxsw_sp_fib_key key;
381 struct mlxsw_sp_fib_entry_decap {
382 struct mlxsw_sp_ipip_entry *ipip_entry;
386 struct mlxsw_sp_fib_entry {
387 struct list_head list;
388 struct mlxsw_sp_fib_node *fib_node;
389 enum mlxsw_sp_fib_entry_type type;
390 struct list_head nexthop_group_node;
391 struct mlxsw_sp_nexthop_group *nh_group;
392 struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
395 struct mlxsw_sp_fib4_entry {
396 struct mlxsw_sp_fib_entry common;
403 struct mlxsw_sp_fib6_entry {
404 struct mlxsw_sp_fib_entry common;
405 struct list_head rt6_list;
409 struct mlxsw_sp_rt6 {
410 struct list_head list;
411 struct fib6_info *rt;
414 struct mlxsw_sp_lpm_tree {
416 unsigned int ref_count;
417 enum mlxsw_sp_l3proto proto;
418 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
419 struct mlxsw_sp_prefix_usage prefix_usage;
422 struct mlxsw_sp_fib {
423 struct rhashtable ht;
424 struct list_head node_list;
425 struct mlxsw_sp_vr *vr;
426 struct mlxsw_sp_lpm_tree *lpm_tree;
427 enum mlxsw_sp_l3proto proto;
431 u16 id; /* virtual router ID */
432 u32 tb_id; /* kernel fib table id */
433 unsigned int rif_count;
434 struct mlxsw_sp_fib *fib4;
435 struct mlxsw_sp_fib *fib6;
436 struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
439 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
441 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
442 struct mlxsw_sp_vr *vr,
443 enum mlxsw_sp_l3proto proto)
445 struct mlxsw_sp_lpm_tree *lpm_tree;
446 struct mlxsw_sp_fib *fib;
449 lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
450 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
452 return ERR_PTR(-ENOMEM);
453 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
455 goto err_rhashtable_init;
456 INIT_LIST_HEAD(&fib->node_list);
459 fib->lpm_tree = lpm_tree;
460 mlxsw_sp_lpm_tree_hold(lpm_tree);
461 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
463 goto err_lpm_tree_bind;
467 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
473 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
474 struct mlxsw_sp_fib *fib)
476 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
477 mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
478 WARN_ON(!list_empty(&fib->node_list));
479 rhashtable_destroy(&fib->ht);
483 static struct mlxsw_sp_lpm_tree *
484 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
486 static struct mlxsw_sp_lpm_tree *lpm_tree;
489 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
490 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
491 if (lpm_tree->ref_count == 0)
497 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
498 struct mlxsw_sp_lpm_tree *lpm_tree)
500 char ralta_pl[MLXSW_REG_RALTA_LEN];
502 mlxsw_reg_ralta_pack(ralta_pl, true,
503 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
505 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
508 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
509 struct mlxsw_sp_lpm_tree *lpm_tree)
511 char ralta_pl[MLXSW_REG_RALTA_LEN];
513 mlxsw_reg_ralta_pack(ralta_pl, false,
514 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
516 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
520 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
521 struct mlxsw_sp_prefix_usage *prefix_usage,
522 struct mlxsw_sp_lpm_tree *lpm_tree)
524 char ralst_pl[MLXSW_REG_RALST_LEN];
527 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
529 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
532 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
533 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
536 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
537 MLXSW_REG_RALST_BIN_NO_CHILD);
538 last_prefix = prefix;
540 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
543 static struct mlxsw_sp_lpm_tree *
544 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
545 struct mlxsw_sp_prefix_usage *prefix_usage,
546 enum mlxsw_sp_l3proto proto)
548 struct mlxsw_sp_lpm_tree *lpm_tree;
551 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
553 return ERR_PTR(-EBUSY);
554 lpm_tree->proto = proto;
555 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
559 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
562 goto err_left_struct_set;
563 memcpy(&lpm_tree->prefix_usage, prefix_usage,
564 sizeof(lpm_tree->prefix_usage));
565 memset(&lpm_tree->prefix_ref_count, 0,
566 sizeof(lpm_tree->prefix_ref_count));
567 lpm_tree->ref_count = 1;
571 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
575 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
576 struct mlxsw_sp_lpm_tree *lpm_tree)
578 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
581 static struct mlxsw_sp_lpm_tree *
582 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
583 struct mlxsw_sp_prefix_usage *prefix_usage,
584 enum mlxsw_sp_l3proto proto)
586 struct mlxsw_sp_lpm_tree *lpm_tree;
589 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
590 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
591 if (lpm_tree->ref_count != 0 &&
592 lpm_tree->proto == proto &&
593 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
595 mlxsw_sp_lpm_tree_hold(lpm_tree);
599 return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
602 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
604 lpm_tree->ref_count++;
607 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
608 struct mlxsw_sp_lpm_tree *lpm_tree)
610 if (--lpm_tree->ref_count == 0)
611 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
614 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
616 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
618 struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
619 struct mlxsw_sp_lpm_tree *lpm_tree;
623 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
626 max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
627 mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
628 mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
629 sizeof(struct mlxsw_sp_lpm_tree),
631 if (!mlxsw_sp->router->lpm.trees)
634 for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
635 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
636 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
639 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
640 MLXSW_SP_L3_PROTO_IPV4);
641 if (IS_ERR(lpm_tree)) {
642 err = PTR_ERR(lpm_tree);
643 goto err_ipv4_tree_get;
645 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
647 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
648 MLXSW_SP_L3_PROTO_IPV6);
649 if (IS_ERR(lpm_tree)) {
650 err = PTR_ERR(lpm_tree);
651 goto err_ipv6_tree_get;
653 mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
658 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
659 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
661 kfree(mlxsw_sp->router->lpm.trees);
665 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
667 struct mlxsw_sp_lpm_tree *lpm_tree;
669 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
670 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
672 lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
673 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
675 kfree(mlxsw_sp->router->lpm.trees);
678 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
680 return !!vr->fib4 || !!vr->fib6 ||
681 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
682 !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
685 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
687 struct mlxsw_sp_vr *vr;
690 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
691 vr = &mlxsw_sp->router->vrs[i];
692 if (!mlxsw_sp_vr_is_used(vr))
698 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
699 const struct mlxsw_sp_fib *fib, u8 tree_id)
701 char raltb_pl[MLXSW_REG_RALTB_LEN];
703 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
704 (enum mlxsw_reg_ralxx_protocol) fib->proto,
706 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
709 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
710 const struct mlxsw_sp_fib *fib)
712 char raltb_pl[MLXSW_REG_RALTB_LEN];
714 /* Bind to tree 0 which is default */
715 mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
716 (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
717 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
720 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
722 /* For our purpose, squash main, default and local tables into one */
723 if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
724 tb_id = RT_TABLE_MAIN;
728 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
731 struct mlxsw_sp_vr *vr;
734 tb_id = mlxsw_sp_fix_tb_id(tb_id);
736 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
737 vr = &mlxsw_sp->router->vrs[i];
738 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
744 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
745 enum mlxsw_sp_l3proto proto)
748 case MLXSW_SP_L3_PROTO_IPV4:
750 case MLXSW_SP_L3_PROTO_IPV6:
756 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
758 struct netlink_ext_ack *extack)
760 struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
761 struct mlxsw_sp_fib *fib4;
762 struct mlxsw_sp_fib *fib6;
763 struct mlxsw_sp_vr *vr;
766 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
768 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
769 return ERR_PTR(-EBUSY);
771 fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
773 return ERR_CAST(fib4);
774 fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
777 goto err_fib6_create;
779 mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
780 MLXSW_SP_L3_PROTO_IPV4);
781 if (IS_ERR(mr4_table)) {
782 err = PTR_ERR(mr4_table);
783 goto err_mr4_table_create;
785 mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
786 MLXSW_SP_L3_PROTO_IPV6);
787 if (IS_ERR(mr6_table)) {
788 err = PTR_ERR(mr6_table);
789 goto err_mr6_table_create;
794 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
795 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
799 err_mr6_table_create:
800 mlxsw_sp_mr_table_destroy(mr4_table);
801 err_mr4_table_create:
802 mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
804 mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
808 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
809 struct mlxsw_sp_vr *vr)
811 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
812 vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
813 mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
814 vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
815 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
817 mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
821 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
822 struct netlink_ext_ack *extack)
824 struct mlxsw_sp_vr *vr;
826 tb_id = mlxsw_sp_fix_tb_id(tb_id);
827 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
829 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
833 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
835 if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
836 list_empty(&vr->fib6->node_list) &&
837 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
838 mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
839 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
843 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
844 enum mlxsw_sp_l3proto proto, u8 tree_id)
846 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
848 if (!mlxsw_sp_vr_is_used(vr))
850 if (fib->lpm_tree->id == tree_id)
855 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
856 struct mlxsw_sp_fib *fib,
857 struct mlxsw_sp_lpm_tree *new_tree)
859 struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
862 fib->lpm_tree = new_tree;
863 mlxsw_sp_lpm_tree_hold(new_tree);
864 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
867 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
871 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
872 fib->lpm_tree = old_tree;
876 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
877 struct mlxsw_sp_fib *fib,
878 struct mlxsw_sp_lpm_tree *new_tree)
880 enum mlxsw_sp_l3proto proto = fib->proto;
881 struct mlxsw_sp_lpm_tree *old_tree;
882 u8 old_id, new_id = new_tree->id;
883 struct mlxsw_sp_vr *vr;
886 old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
887 old_id = old_tree->id;
889 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
890 vr = &mlxsw_sp->router->vrs[i];
891 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
893 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
894 mlxsw_sp_vr_fib(vr, proto),
897 goto err_tree_replace;
900 memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
901 sizeof(new_tree->prefix_ref_count));
902 mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
903 mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
908 for (i--; i >= 0; i--) {
909 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
911 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912 mlxsw_sp_vr_fib(vr, proto),
918 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
920 struct mlxsw_sp_vr *vr;
924 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
927 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
928 mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
930 if (!mlxsw_sp->router->vrs)
933 for (i = 0; i < max_vrs; i++) {
934 vr = &mlxsw_sp->router->vrs[i];
941 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
943 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
945 /* At this stage we're guaranteed not to have new incoming
946 * FIB notifications and the work queue is free from FIBs
947 * sitting on top of mlxsw netdevs. However, we can still
948 * have other FIBs queued. Flush the queue before flushing
949 * the device's tables. No need for locks, as we're the only
952 mlxsw_core_flush_owq();
953 mlxsw_sp_router_fib_flush(mlxsw_sp);
954 kfree(mlxsw_sp->router->vrs);
957 static struct net_device *
958 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
960 struct ip_tunnel *tun = netdev_priv(ol_dev);
961 struct net *net = dev_net(ol_dev);
963 return __dev_get_by_index(net, tun->parms.link);
966 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
968 struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
971 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
973 return RT_TABLE_MAIN;
976 static struct mlxsw_sp_rif *
977 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
978 const struct mlxsw_sp_rif_params *params,
979 struct netlink_ext_ack *extack);
981 static struct mlxsw_sp_rif_ipip_lb *
982 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
983 enum mlxsw_sp_ipip_type ipipt,
984 struct net_device *ol_dev,
985 struct netlink_ext_ack *extack)
987 struct mlxsw_sp_rif_params_ipip_lb lb_params;
988 const struct mlxsw_sp_ipip_ops *ipip_ops;
989 struct mlxsw_sp_rif *rif;
991 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
992 lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
993 .common.dev = ol_dev,
995 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
998 rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1000 return ERR_CAST(rif);
1001 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1004 static struct mlxsw_sp_ipip_entry *
1005 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1006 enum mlxsw_sp_ipip_type ipipt,
1007 struct net_device *ol_dev)
1009 const struct mlxsw_sp_ipip_ops *ipip_ops;
1010 struct mlxsw_sp_ipip_entry *ipip_entry;
1011 struct mlxsw_sp_ipip_entry *ret = NULL;
1013 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1014 ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1016 return ERR_PTR(-ENOMEM);
1018 ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1020 if (IS_ERR(ipip_entry->ol_lb)) {
1021 ret = ERR_CAST(ipip_entry->ol_lb);
1022 goto err_ol_ipip_lb_create;
1025 ipip_entry->ipipt = ipipt;
1026 ipip_entry->ol_dev = ol_dev;
1028 switch (ipip_ops->ul_proto) {
1029 case MLXSW_SP_L3_PROTO_IPV4:
1030 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1032 case MLXSW_SP_L3_PROTO_IPV6:
1039 err_ol_ipip_lb_create:
1045 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1047 mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1052 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1053 const enum mlxsw_sp_l3proto ul_proto,
1054 union mlxsw_sp_l3addr saddr,
1056 struct mlxsw_sp_ipip_entry *ipip_entry)
1058 u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1059 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1060 union mlxsw_sp_l3addr tun_saddr;
1062 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1065 tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1066 return tun_ul_tb_id == ul_tb_id &&
1067 mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1071 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1072 struct mlxsw_sp_fib_entry *fib_entry,
1073 struct mlxsw_sp_ipip_entry *ipip_entry)
1078 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1083 ipip_entry->decap_fib_entry = fib_entry;
1084 fib_entry->decap.ipip_entry = ipip_entry;
1085 fib_entry->decap.tunnel_index = tunnel_index;
1089 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1090 struct mlxsw_sp_fib_entry *fib_entry)
1092 /* Unlink this node from the IPIP entry that it's the decap entry of. */
1093 fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1094 fib_entry->decap.ipip_entry = NULL;
1095 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1096 1, fib_entry->decap.tunnel_index);
1099 static struct mlxsw_sp_fib_node *
1100 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1101 size_t addr_len, unsigned char prefix_len);
1102 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1103 struct mlxsw_sp_fib_entry *fib_entry);
1106 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1107 struct mlxsw_sp_ipip_entry *ipip_entry)
1109 struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1111 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1112 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1114 mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1118 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1119 struct mlxsw_sp_ipip_entry *ipip_entry,
1120 struct mlxsw_sp_fib_entry *decap_fib_entry)
1122 if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1125 decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1127 if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1128 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1131 /* Given an IPIP entry, find the corresponding decap route. */
1132 static struct mlxsw_sp_fib_entry *
1133 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1134 struct mlxsw_sp_ipip_entry *ipip_entry)
1136 static struct mlxsw_sp_fib_node *fib_node;
1137 const struct mlxsw_sp_ipip_ops *ipip_ops;
1138 struct mlxsw_sp_fib_entry *fib_entry;
1139 unsigned char saddr_prefix_len;
1140 union mlxsw_sp_l3addr saddr;
1141 struct mlxsw_sp_fib *ul_fib;
1142 struct mlxsw_sp_vr *ul_vr;
1148 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1150 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1151 ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1155 ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1156 saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1157 ipip_entry->ol_dev);
1159 switch (ipip_ops->ul_proto) {
1160 case MLXSW_SP_L3_PROTO_IPV4:
1161 saddr4 = be32_to_cpu(saddr.addr4);
1164 saddr_prefix_len = 32;
1166 case MLXSW_SP_L3_PROTO_IPV6:
1171 fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1173 if (!fib_node || list_empty(&fib_node->entry_list))
1176 fib_entry = list_first_entry(&fib_node->entry_list,
1177 struct mlxsw_sp_fib_entry, list);
1178 if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1184 static struct mlxsw_sp_ipip_entry *
1185 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1186 enum mlxsw_sp_ipip_type ipipt,
1187 struct net_device *ol_dev)
1189 struct mlxsw_sp_ipip_entry *ipip_entry;
1191 ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1192 if (IS_ERR(ipip_entry))
1195 list_add_tail(&ipip_entry->ipip_list_node,
1196 &mlxsw_sp->router->ipip_list);
1202 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1203 struct mlxsw_sp_ipip_entry *ipip_entry)
1205 list_del(&ipip_entry->ipip_list_node);
1206 mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1210 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1211 const struct net_device *ul_dev,
1212 enum mlxsw_sp_l3proto ul_proto,
1213 union mlxsw_sp_l3addr ul_dip,
1214 struct mlxsw_sp_ipip_entry *ipip_entry)
1216 u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1217 enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1219 if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1222 return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1223 ul_tb_id, ipip_entry);
1226 /* Given decap parameters, find the corresponding IPIP entry. */
1227 static struct mlxsw_sp_ipip_entry *
1228 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1229 const struct net_device *ul_dev,
1230 enum mlxsw_sp_l3proto ul_proto,
1231 union mlxsw_sp_l3addr ul_dip)
1233 struct mlxsw_sp_ipip_entry *ipip_entry;
1235 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1237 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1245 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1246 const struct net_device *dev,
1247 enum mlxsw_sp_ipip_type *p_type)
1249 struct mlxsw_sp_router *router = mlxsw_sp->router;
1250 const struct mlxsw_sp_ipip_ops *ipip_ops;
1251 enum mlxsw_sp_ipip_type ipipt;
1253 for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1254 ipip_ops = router->ipip_ops_arr[ipipt];
1255 if (dev->type == ipip_ops->dev_type) {
1264 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1265 const struct net_device *dev)
1267 return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1270 static struct mlxsw_sp_ipip_entry *
1271 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1272 const struct net_device *ol_dev)
1274 struct mlxsw_sp_ipip_entry *ipip_entry;
1276 list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1278 if (ipip_entry->ol_dev == ol_dev)
1284 static struct mlxsw_sp_ipip_entry *
1285 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1286 const struct net_device *ul_dev,
1287 struct mlxsw_sp_ipip_entry *start)
1289 struct mlxsw_sp_ipip_entry *ipip_entry;
1291 ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1293 list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1295 struct net_device *ipip_ul_dev =
1296 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1298 if (ipip_ul_dev == ul_dev)
1305 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1306 const struct net_device *dev)
1308 return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1311 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1312 const struct net_device *ol_dev,
1313 enum mlxsw_sp_ipip_type ipipt)
1315 const struct mlxsw_sp_ipip_ops *ops
1316 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1318 /* For deciding whether decap should be offloaded, we don't care about
1319 * overlay protocol, so ask whether either one is supported.
1321 return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1322 ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1325 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1326 struct net_device *ol_dev)
1328 struct mlxsw_sp_ipip_entry *ipip_entry;
1329 enum mlxsw_sp_l3proto ul_proto;
1330 enum mlxsw_sp_ipip_type ipipt;
1331 union mlxsw_sp_l3addr saddr;
1334 mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1335 if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1336 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1337 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1338 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1339 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1342 ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1344 if (IS_ERR(ipip_entry))
1345 return PTR_ERR(ipip_entry);
1352 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1353 struct net_device *ol_dev)
1355 struct mlxsw_sp_ipip_entry *ipip_entry;
1357 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1359 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1363 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1364 struct mlxsw_sp_ipip_entry *ipip_entry)
1366 struct mlxsw_sp_fib_entry *decap_fib_entry;
1368 decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1369 if (decap_fib_entry)
1370 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1375 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1376 struct mlxsw_sp_vr *ul_vr, bool enable)
1378 struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1379 struct mlxsw_sp_rif *rif = &lb_rif->common;
1380 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1381 char ritr_pl[MLXSW_REG_RITR_LEN];
1384 switch (lb_cf.ul_protocol) {
1385 case MLXSW_SP_L3_PROTO_IPV4:
1386 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1387 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1388 rif->rif_index, rif->vr_id, rif->dev->mtu);
1389 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1390 MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1391 ul_vr->id, saddr4, lb_cf.okey);
1394 case MLXSW_SP_L3_PROTO_IPV6:
1395 return -EAFNOSUPPORT;
1398 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1401 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1402 struct net_device *ol_dev)
1404 struct mlxsw_sp_ipip_entry *ipip_entry;
1405 struct mlxsw_sp_rif_ipip_lb *lb_rif;
1406 struct mlxsw_sp_vr *ul_vr;
1409 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1411 lb_rif = ipip_entry->ol_lb;
1412 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1413 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1416 lb_rif->common.mtu = ol_dev->mtu;
1423 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1424 struct net_device *ol_dev)
1426 struct mlxsw_sp_ipip_entry *ipip_entry;
1428 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1430 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1434 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1435 struct mlxsw_sp_ipip_entry *ipip_entry)
1437 if (ipip_entry->decap_fib_entry)
1438 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1441 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1442 struct net_device *ol_dev)
1444 struct mlxsw_sp_ipip_entry *ipip_entry;
1446 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1448 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1451 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1452 struct mlxsw_sp_rif *old_rif,
1453 struct mlxsw_sp_rif *new_rif);
1455 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1456 struct mlxsw_sp_ipip_entry *ipip_entry,
1458 struct netlink_ext_ack *extack)
1460 struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1461 struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1463 new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1467 if (IS_ERR(new_lb_rif))
1468 return PTR_ERR(new_lb_rif);
1469 ipip_entry->ol_lb = new_lb_rif;
1472 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1473 &new_lb_rif->common);
1475 mlxsw_sp_rif_destroy(&old_lb_rif->common);
1480 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1481 struct mlxsw_sp_rif *rif);
1484 * Update the offload related to an IPIP entry. This always updates decap, and
1485 * in addition to that it also:
1486 * @recreate_loopback: recreates the associated loopback RIF
1487 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1488 * relevant when recreate_loopback is true.
1489 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1490 * is only relevant when recreate_loopback is false.
1492 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1493 struct mlxsw_sp_ipip_entry *ipip_entry,
1494 bool recreate_loopback,
1496 bool update_nexthops,
1497 struct netlink_ext_ack *extack)
1501 /* RIFs can't be edited, so to update loopback, we need to destroy and
1502 * recreate it. That creates a window of opportunity where RALUE and
1503 * RATR registers end up referencing a RIF that's already gone. RATRs
1504 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1505 * of RALUE, demote the decap route back.
1507 if (ipip_entry->decap_fib_entry)
1508 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1510 if (recreate_loopback) {
1511 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1512 keep_encap, extack);
1515 } else if (update_nexthops) {
1516 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1517 &ipip_entry->ol_lb->common);
1520 if (ipip_entry->ol_dev->flags & IFF_UP)
1521 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1526 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1527 struct net_device *ol_dev,
1528 struct netlink_ext_ack *extack)
1530 struct mlxsw_sp_ipip_entry *ipip_entry =
1531 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1536 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1537 true, false, false, extack);
1541 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1542 struct mlxsw_sp_ipip_entry *ipip_entry,
1543 struct net_device *ul_dev,
1544 struct netlink_ext_ack *extack)
1546 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1547 true, true, false, extack);
1551 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1552 struct mlxsw_sp_ipip_entry *ipip_entry,
1553 struct net_device *ul_dev)
1555 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1556 false, false, true, NULL);
1560 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1561 struct mlxsw_sp_ipip_entry *ipip_entry,
1562 struct net_device *ul_dev)
1564 /* A down underlay device causes encapsulated packets to not be
1565 * forwarded, but decap still works. So refresh next hops without
1566 * touching anything else.
1568 return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1569 false, false, true, NULL);
1573 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1574 struct net_device *ol_dev,
1575 struct netlink_ext_ack *extack)
1577 const struct mlxsw_sp_ipip_ops *ipip_ops;
1578 struct mlxsw_sp_ipip_entry *ipip_entry;
1581 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1583 /* A change might make a tunnel eligible for offloading, but
1584 * that is currently not implemented. What falls to slow path
1589 /* A change might make a tunnel not eligible for offloading. */
1590 if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1591 ipip_entry->ipipt)) {
1592 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1596 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1597 err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1601 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1602 struct mlxsw_sp_ipip_entry *ipip_entry)
1604 struct net_device *ol_dev = ipip_entry->ol_dev;
1606 if (ol_dev->flags & IFF_UP)
1607 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1608 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1611 /* The configuration where several tunnels have the same local address in the
1612 * same underlay table needs special treatment in the HW. That is currently not
1613 * implemented in the driver. This function finds and demotes the first tunnel
1614 * with a given source address, except the one passed in in the argument
1618 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1619 enum mlxsw_sp_l3proto ul_proto,
1620 union mlxsw_sp_l3addr saddr,
1622 const struct mlxsw_sp_ipip_entry *except)
1624 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1626 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1628 if (ipip_entry != except &&
1629 mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1630 ul_tb_id, ipip_entry)) {
1631 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1639 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1640 struct net_device *ul_dev)
1642 struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1644 list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1646 struct net_device *ipip_ul_dev =
1647 __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1649 if (ipip_ul_dev == ul_dev)
1650 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1654 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1655 struct net_device *ol_dev,
1656 unsigned long event,
1657 struct netdev_notifier_info *info)
1659 struct netdev_notifier_changeupper_info *chup;
1660 struct netlink_ext_ack *extack;
1663 case NETDEV_REGISTER:
1664 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1665 case NETDEV_UNREGISTER:
1666 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1669 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1672 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1674 case NETDEV_CHANGEUPPER:
1675 chup = container_of(info, typeof(*chup), info);
1676 extack = info->extack;
1677 if (netif_is_l3_master(chup->upper_dev))
1678 return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1683 extack = info->extack;
1684 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1686 case NETDEV_CHANGEMTU:
1687 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1693 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1694 struct mlxsw_sp_ipip_entry *ipip_entry,
1695 struct net_device *ul_dev,
1696 unsigned long event,
1697 struct netdev_notifier_info *info)
1699 struct netdev_notifier_changeupper_info *chup;
1700 struct netlink_ext_ack *extack;
1703 case NETDEV_CHANGEUPPER:
1704 chup = container_of(info, typeof(*chup), info);
1705 extack = info->extack;
1706 if (netif_is_l3_master(chup->upper_dev))
1707 return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1714 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1717 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1725 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1726 struct net_device *ul_dev,
1727 unsigned long event,
1728 struct netdev_notifier_info *info)
1730 struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1733 while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1736 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1737 ul_dev, event, info);
1739 mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1748 struct mlxsw_sp_neigh_key {
1749 struct neighbour *n;
1752 struct mlxsw_sp_neigh_entry {
1753 struct list_head rif_list_node;
1754 struct rhash_head ht_node;
1755 struct mlxsw_sp_neigh_key key;
1758 unsigned char ha[ETH_ALEN];
1759 struct list_head nexthop_list; /* list of nexthops using
1762 struct list_head nexthop_neighs_list_node;
1763 unsigned int counter_index;
1767 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1768 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1769 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1770 .key_len = sizeof(struct mlxsw_sp_neigh_key),
1773 struct mlxsw_sp_neigh_entry *
1774 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1775 struct mlxsw_sp_neigh_entry *neigh_entry)
1778 if (list_empty(&rif->neigh_list))
1781 return list_first_entry(&rif->neigh_list,
1782 typeof(*neigh_entry),
1785 if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1787 return list_next_entry(neigh_entry, rif_list_node);
1790 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1792 return neigh_entry->key.n->tbl->family;
1796 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1798 return neigh_entry->ha;
1801 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1803 struct neighbour *n;
1805 n = neigh_entry->key.n;
1806 return ntohl(*((__be32 *) n->primary_key));
1810 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1812 struct neighbour *n;
1814 n = neigh_entry->key.n;
1815 return (struct in6_addr *) &n->primary_key;
1818 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1819 struct mlxsw_sp_neigh_entry *neigh_entry,
1822 if (!neigh_entry->counter_valid)
1825 return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1829 static struct mlxsw_sp_neigh_entry *
1830 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1833 struct mlxsw_sp_neigh_entry *neigh_entry;
1835 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1839 neigh_entry->key.n = n;
1840 neigh_entry->rif = rif;
1841 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1846 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1852 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1853 struct mlxsw_sp_neigh_entry *neigh_entry)
1855 return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1856 &neigh_entry->ht_node,
1857 mlxsw_sp_neigh_ht_params);
1861 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1862 struct mlxsw_sp_neigh_entry *neigh_entry)
1864 rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1865 &neigh_entry->ht_node,
1866 mlxsw_sp_neigh_ht_params);
1870 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1871 struct mlxsw_sp_neigh_entry *neigh_entry)
1873 struct devlink *devlink;
1874 const char *table_name;
1876 switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1878 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1881 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1888 devlink = priv_to_devlink(mlxsw_sp->core);
1889 return devlink_dpipe_table_counter_enabled(devlink, table_name);
1893 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1894 struct mlxsw_sp_neigh_entry *neigh_entry)
1896 if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1899 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1902 neigh_entry->counter_valid = true;
1906 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1907 struct mlxsw_sp_neigh_entry *neigh_entry)
1909 if (!neigh_entry->counter_valid)
1911 mlxsw_sp_flow_counter_free(mlxsw_sp,
1912 neigh_entry->counter_index);
1913 neigh_entry->counter_valid = false;
1916 static struct mlxsw_sp_neigh_entry *
1917 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1919 struct mlxsw_sp_neigh_entry *neigh_entry;
1920 struct mlxsw_sp_rif *rif;
1923 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1925 return ERR_PTR(-EINVAL);
1927 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1929 return ERR_PTR(-ENOMEM);
1931 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1933 goto err_neigh_entry_insert;
1935 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1936 list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1940 err_neigh_entry_insert:
1941 mlxsw_sp_neigh_entry_free(neigh_entry);
1942 return ERR_PTR(err);
1946 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1947 struct mlxsw_sp_neigh_entry *neigh_entry)
1949 list_del(&neigh_entry->rif_list_node);
1950 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1951 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1952 mlxsw_sp_neigh_entry_free(neigh_entry);
1955 static struct mlxsw_sp_neigh_entry *
1956 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1958 struct mlxsw_sp_neigh_key key;
1961 return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1962 &key, mlxsw_sp_neigh_ht_params);
1966 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1968 unsigned long interval;
1970 #if IS_ENABLED(CONFIG_IPV6)
1971 interval = min_t(unsigned long,
1972 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1973 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1975 interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1977 mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1980 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1984 struct net_device *dev;
1985 struct neighbour *n;
1990 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1992 if (!mlxsw_sp->router->rifs[rif]) {
1993 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1998 dev = mlxsw_sp->router->rifs[rif]->dev;
1999 n = neigh_lookup(&arp_tbl, &dipn, dev);
2003 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2004 neigh_event_send(n, NULL);
2008 #if IS_ENABLED(CONFIG_IPV6)
2009 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2013 struct net_device *dev;
2014 struct neighbour *n;
2015 struct in6_addr dip;
2018 mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2021 if (!mlxsw_sp->router->rifs[rif]) {
2022 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2026 dev = mlxsw_sp->router->rifs[rif]->dev;
2027 n = neigh_lookup(&nd_tbl, &dip, dev);
2031 netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2032 neigh_event_send(n, NULL);
2036 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2043 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2050 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2052 /* Hardware starts counting at 0, so add 1. */
2055 /* Each record consists of several neighbour entries. */
2056 for (i = 0; i < num_entries; i++) {
2059 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2060 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2066 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2070 /* One record contains one entry. */
2071 mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2075 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2076 char *rauhtd_pl, int rec_index)
2078 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2079 case MLXSW_REG_RAUHTD_TYPE_IPV4:
2080 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2083 case MLXSW_REG_RAUHTD_TYPE_IPV6:
2084 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2090 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2092 u8 num_rec, last_rec_index, num_entries;
2094 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2095 last_rec_index = num_rec - 1;
2097 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2099 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2100 MLXSW_REG_RAUHTD_TYPE_IPV6)
2103 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2105 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2111 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2113 enum mlxsw_reg_rauhtd_type type)
2118 /* Make sure the neighbour's netdev isn't removed in the
2123 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2124 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2127 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2130 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2131 for (i = 0; i < num_rec; i++)
2132 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2134 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2140 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2142 enum mlxsw_reg_rauhtd_type type;
2146 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2150 type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2151 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2155 type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2156 err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2162 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2164 struct mlxsw_sp_neigh_entry *neigh_entry;
2166 /* Take RTNL mutex here to prevent lists from changes */
2168 list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2169 nexthop_neighs_list_node)
2170 /* If this neigh have nexthops, make the kernel think this neigh
2171 * is active regardless of the traffic.
2173 neigh_event_send(neigh_entry->key.n, NULL);
2178 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2180 unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2182 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2183 msecs_to_jiffies(interval));
2186 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2188 struct mlxsw_sp_router *router;
2191 router = container_of(work, struct mlxsw_sp_router,
2192 neighs_update.dw.work);
2193 err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2195 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2197 mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2199 mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2202 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2204 struct mlxsw_sp_neigh_entry *neigh_entry;
2205 struct mlxsw_sp_router *router;
2207 router = container_of(work, struct mlxsw_sp_router,
2208 nexthop_probe_dw.work);
2209 /* Iterate over nexthop neighbours, find those who are unresolved and
2210 * send arp on them. This solves the chicken-egg problem when
2211 * the nexthop wouldn't get offloaded until the neighbor is resolved
2212 * but it wouldn't get resolved ever in case traffic is flowing in HW
2213 * using different nexthop.
2215 * Take RTNL mutex here to prevent lists from changes.
2218 list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2219 nexthop_neighs_list_node)
2220 if (!neigh_entry->connected)
2221 neigh_event_send(neigh_entry->key.n, NULL);
2224 mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2225 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2229 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2230 struct mlxsw_sp_neigh_entry *neigh_entry,
2231 bool removing, bool dead);
2233 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2235 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2236 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2240 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2241 struct mlxsw_sp_neigh_entry *neigh_entry,
2242 enum mlxsw_reg_rauht_op op)
2244 struct neighbour *n = neigh_entry->key.n;
2245 u32 dip = ntohl(*((__be32 *) n->primary_key));
2246 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2248 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2250 if (neigh_entry->counter_valid)
2251 mlxsw_reg_rauht_pack_counter(rauht_pl,
2252 neigh_entry->counter_index);
2253 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2257 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2258 struct mlxsw_sp_neigh_entry *neigh_entry,
2259 enum mlxsw_reg_rauht_op op)
2261 struct neighbour *n = neigh_entry->key.n;
2262 char rauht_pl[MLXSW_REG_RAUHT_LEN];
2263 const char *dip = n->primary_key;
2265 mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2267 if (neigh_entry->counter_valid)
2268 mlxsw_reg_rauht_pack_counter(rauht_pl,
2269 neigh_entry->counter_index);
2270 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2273 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2275 struct neighbour *n = neigh_entry->key.n;
2277 /* Packets with a link-local destination address are trapped
2278 * after LPM lookup and never reach the neighbour table, so
2279 * there is no need to program such neighbours to the device.
2281 if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2282 IPV6_ADDR_LINKLOCAL)
2288 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2289 struct mlxsw_sp_neigh_entry *neigh_entry,
2292 if (!adding && !neigh_entry->connected)
2294 neigh_entry->connected = adding;
2295 if (neigh_entry->key.n->tbl->family == AF_INET) {
2296 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2297 mlxsw_sp_rauht_op(adding));
2298 } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2299 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2301 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2302 mlxsw_sp_rauht_op(adding));
2309 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2310 struct mlxsw_sp_neigh_entry *neigh_entry,
2314 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2316 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2317 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2320 struct mlxsw_sp_netevent_work {
2321 struct work_struct work;
2322 struct mlxsw_sp *mlxsw_sp;
2323 struct neighbour *n;
2326 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2328 struct mlxsw_sp_netevent_work *net_work =
2329 container_of(work, struct mlxsw_sp_netevent_work, work);
2330 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2331 struct mlxsw_sp_neigh_entry *neigh_entry;
2332 struct neighbour *n = net_work->n;
2333 unsigned char ha[ETH_ALEN];
2334 bool entry_connected;
2337 /* If these parameters are changed after we release the lock,
2338 * then we are guaranteed to receive another event letting us
2341 read_lock_bh(&n->lock);
2342 memcpy(ha, n->ha, ETH_ALEN);
2343 nud_state = n->nud_state;
2345 read_unlock_bh(&n->lock);
2348 mlxsw_sp_span_respin(mlxsw_sp);
2350 entry_connected = nud_state & NUD_VALID && !dead;
2351 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2352 if (!entry_connected && !neigh_entry)
2355 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2356 if (IS_ERR(neigh_entry))
2360 memcpy(neigh_entry->ha, ha, ETH_ALEN);
2361 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2362 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2365 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2366 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2374 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2376 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2378 struct mlxsw_sp_netevent_work *net_work =
2379 container_of(work, struct mlxsw_sp_netevent_work, work);
2380 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2382 mlxsw_sp_mp_hash_init(mlxsw_sp);
2386 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2388 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2390 struct mlxsw_sp_netevent_work *net_work =
2391 container_of(work, struct mlxsw_sp_netevent_work, work);
2392 struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2394 __mlxsw_sp_router_init(mlxsw_sp);
2398 static int mlxsw_sp_router_schedule_work(struct net *net,
2399 struct notifier_block *nb,
2400 void (*cb)(struct work_struct *))
2402 struct mlxsw_sp_netevent_work *net_work;
2403 struct mlxsw_sp_router *router;
2405 if (!net_eq(net, &init_net))
2408 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2412 router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2413 INIT_WORK(&net_work->work, cb);
2414 net_work->mlxsw_sp = router->mlxsw_sp;
2415 mlxsw_core_schedule_work(&net_work->work);
2419 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2420 unsigned long event, void *ptr)
2422 struct mlxsw_sp_netevent_work *net_work;
2423 struct mlxsw_sp_port *mlxsw_sp_port;
2424 struct mlxsw_sp *mlxsw_sp;
2425 unsigned long interval;
2426 struct neigh_parms *p;
2427 struct neighbour *n;
2430 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2433 /* We don't care about changes in the default table. */
2434 if (!p->dev || (p->tbl->family != AF_INET &&
2435 p->tbl->family != AF_INET6))
2438 /* We are in atomic context and can't take RTNL mutex,
2439 * so use RCU variant to walk the device chain.
2441 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2445 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2446 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2447 mlxsw_sp->router->neighs_update.interval = interval;
2449 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2451 case NETEVENT_NEIGH_UPDATE:
2454 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2457 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2461 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2463 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2467 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2468 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2471 /* Take a reference to ensure the neighbour won't be
2472 * destructed until we drop the reference in delayed
2476 mlxsw_core_schedule_work(&net_work->work);
2477 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2479 case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2480 case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2481 return mlxsw_sp_router_schedule_work(ptr, nb,
2482 mlxsw_sp_router_mp_hash_event_work);
2484 case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2485 return mlxsw_sp_router_schedule_work(ptr, nb,
2486 mlxsw_sp_router_update_priority_work);
2492 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2496 err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2497 &mlxsw_sp_neigh_ht_params);
2501 /* Initialize the polling interval according to the default
2504 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2506 /* Create the delayed works for the activity_update */
2507 INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2508 mlxsw_sp_router_neighs_update_work);
2509 INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2510 mlxsw_sp_router_probe_unresolved_nexthops);
2511 mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2512 mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2516 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2518 cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2519 cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2520 rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2523 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2524 struct mlxsw_sp_rif *rif)
2526 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2528 list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2530 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2531 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2535 enum mlxsw_sp_nexthop_type {
2536 MLXSW_SP_NEXTHOP_TYPE_ETH,
2537 MLXSW_SP_NEXTHOP_TYPE_IPIP,
2540 struct mlxsw_sp_nexthop_key {
2541 struct fib_nh *fib_nh;
2544 struct mlxsw_sp_nexthop {
2545 struct list_head neigh_list_node; /* member of neigh entry list */
2546 struct list_head rif_list_node;
2547 struct list_head router_list_node;
2548 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2551 struct rhash_head ht_node;
2552 struct mlxsw_sp_nexthop_key key;
2553 unsigned char gw_addr[sizeof(struct in6_addr)];
2557 int num_adj_entries;
2558 struct mlxsw_sp_rif *rif;
2559 u8 should_offload:1, /* set indicates this neigh is connected and
2560 * should be put to KVD linear area of this group.
2562 offloaded:1, /* set in case the neigh is actually put into
2563 * KVD linear area of this group.
2565 update:1; /* set indicates that MAC of this neigh should be
2568 enum mlxsw_sp_nexthop_type type;
2570 struct mlxsw_sp_neigh_entry *neigh_entry;
2571 struct mlxsw_sp_ipip_entry *ipip_entry;
2573 unsigned int counter_index;
2577 struct mlxsw_sp_nexthop_group {
2579 struct rhash_head ht_node;
2580 struct list_head fib_list; /* list of fib entries that use this group */
2581 struct neigh_table *neigh_tbl;
2582 u8 adj_index_valid:1,
2583 gateway:1; /* routes using the group use a gateway */
2587 int sum_norm_weight;
2588 struct mlxsw_sp_nexthop nexthops[0];
2589 #define nh_rif nexthops[0].rif
2592 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2593 struct mlxsw_sp_nexthop *nh)
2595 struct devlink *devlink;
2597 devlink = priv_to_devlink(mlxsw_sp->core);
2598 if (!devlink_dpipe_table_counter_enabled(devlink,
2599 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2602 if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2605 nh->counter_valid = true;
2608 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2609 struct mlxsw_sp_nexthop *nh)
2611 if (!nh->counter_valid)
2613 mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2614 nh->counter_valid = false;
2617 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2618 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2620 if (!nh->counter_valid)
2623 return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2627 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2628 struct mlxsw_sp_nexthop *nh)
2631 if (list_empty(&router->nexthop_list))
2634 return list_first_entry(&router->nexthop_list,
2635 typeof(*nh), router_list_node);
2637 if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2639 return list_next_entry(nh, router_list_node);
2642 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2644 return nh->offloaded;
2647 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2651 return nh->neigh_entry->ha;
2654 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2655 u32 *p_adj_size, u32 *p_adj_hash_index)
2657 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2658 u32 adj_hash_index = 0;
2661 if (!nh->offloaded || !nh_grp->adj_index_valid)
2664 *p_adj_index = nh_grp->adj_index;
2665 *p_adj_size = nh_grp->ecmp_size;
2667 for (i = 0; i < nh_grp->count; i++) {
2668 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2672 if (nh_iter->offloaded)
2673 adj_hash_index += nh_iter->num_adj_entries;
2676 *p_adj_hash_index = adj_hash_index;
2680 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2685 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2687 struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2690 for (i = 0; i < nh_grp->count; i++) {
2691 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2693 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2699 static struct fib_info *
2700 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2702 return nh_grp->priv;
2705 struct mlxsw_sp_nexthop_group_cmp_arg {
2706 enum mlxsw_sp_l3proto proto;
2708 struct fib_info *fi;
2709 struct mlxsw_sp_fib6_entry *fib6_entry;
2714 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2715 const struct in6_addr *gw, int ifindex,
2720 for (i = 0; i < nh_grp->count; i++) {
2721 const struct mlxsw_sp_nexthop *nh;
2723 nh = &nh_grp->nexthops[i];
2724 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2725 ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2733 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2734 const struct mlxsw_sp_fib6_entry *fib6_entry)
2736 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2738 if (nh_grp->count != fib6_entry->nrt6)
2741 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2742 struct in6_addr *gw;
2743 int ifindex, weight;
2745 ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2746 weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2747 gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2748 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2757 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2759 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2760 const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2762 switch (cmp_arg->proto) {
2763 case MLXSW_SP_L3_PROTO_IPV4:
2764 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2765 case MLXSW_SP_L3_PROTO_IPV6:
2766 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2767 cmp_arg->fib6_entry);
2775 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2777 return nh_grp->neigh_tbl->family;
2780 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2782 const struct mlxsw_sp_nexthop_group *nh_grp = data;
2783 const struct mlxsw_sp_nexthop *nh;
2784 struct fib_info *fi;
2788 switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2790 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2791 return jhash(&fi, sizeof(fi), seed);
2793 val = nh_grp->count;
2794 for (i = 0; i < nh_grp->count; i++) {
2795 nh = &nh_grp->nexthops[i];
2798 return jhash(&val, sizeof(val), seed);
2806 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2808 unsigned int val = fib6_entry->nrt6;
2809 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2810 struct net_device *dev;
2812 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2813 dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2814 val ^= dev->ifindex;
2817 return jhash(&val, sizeof(val), seed);
2821 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2823 const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2825 switch (cmp_arg->proto) {
2826 case MLXSW_SP_L3_PROTO_IPV4:
2827 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2828 case MLXSW_SP_L3_PROTO_IPV6:
2829 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2836 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2837 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2838 .hashfn = mlxsw_sp_nexthop_group_hash,
2839 .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj,
2840 .obj_cmpfn = mlxsw_sp_nexthop_group_cmp,
2843 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2844 struct mlxsw_sp_nexthop_group *nh_grp)
2846 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2850 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2852 mlxsw_sp_nexthop_group_ht_params);
2855 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2856 struct mlxsw_sp_nexthop_group *nh_grp)
2858 if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2862 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2864 mlxsw_sp_nexthop_group_ht_params);
2867 static struct mlxsw_sp_nexthop_group *
2868 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2869 struct fib_info *fi)
2871 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2873 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2875 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2877 mlxsw_sp_nexthop_group_ht_params);
2880 static struct mlxsw_sp_nexthop_group *
2881 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2882 struct mlxsw_sp_fib6_entry *fib6_entry)
2884 struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2886 cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2887 cmp_arg.fib6_entry = fib6_entry;
2888 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2890 mlxsw_sp_nexthop_group_ht_params);
2893 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2894 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2895 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2896 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2899 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2900 struct mlxsw_sp_nexthop *nh)
2902 return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2903 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2906 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2907 struct mlxsw_sp_nexthop *nh)
2909 rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2910 mlxsw_sp_nexthop_ht_params);
2913 static struct mlxsw_sp_nexthop *
2914 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2915 struct mlxsw_sp_nexthop_key key)
2917 return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2918 mlxsw_sp_nexthop_ht_params);
2921 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2922 const struct mlxsw_sp_fib *fib,
2923 u32 adj_index, u16 ecmp_size,
2927 char raleu_pl[MLXSW_REG_RALEU_LEN];
2929 mlxsw_reg_raleu_pack(raleu_pl,
2930 (enum mlxsw_reg_ralxx_protocol) fib->proto,
2931 fib->vr->id, adj_index, ecmp_size, new_adj_index,
2933 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2936 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2937 struct mlxsw_sp_nexthop_group *nh_grp,
2938 u32 old_adj_index, u16 old_ecmp_size)
2940 struct mlxsw_sp_fib_entry *fib_entry;
2941 struct mlxsw_sp_fib *fib = NULL;
2944 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2945 if (fib == fib_entry->fib_node->fib)
2947 fib = fib_entry->fib_node->fib;
2948 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2959 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2960 struct mlxsw_sp_nexthop *nh)
2962 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2963 char ratr_pl[MLXSW_REG_RATR_LEN];
2965 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2966 true, MLXSW_REG_RATR_TYPE_ETHERNET,
2967 adj_index, neigh_entry->rif);
2968 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2969 if (nh->counter_valid)
2970 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2972 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2974 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2977 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2978 struct mlxsw_sp_nexthop *nh)
2982 for (i = 0; i < nh->num_adj_entries; i++) {
2985 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2993 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2995 struct mlxsw_sp_nexthop *nh)
2997 const struct mlxsw_sp_ipip_ops *ipip_ops;
2999 ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3000 return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3003 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3005 struct mlxsw_sp_nexthop *nh)
3009 for (i = 0; i < nh->num_adj_entries; i++) {
3012 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3022 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3023 struct mlxsw_sp_nexthop_group *nh_grp,
3026 u32 adj_index = nh_grp->adj_index; /* base */
3027 struct mlxsw_sp_nexthop *nh;
3031 for (i = 0; i < nh_grp->count; i++) {
3032 nh = &nh_grp->nexthops[i];
3034 if (!nh->should_offload) {
3039 if (nh->update || reallocate) {
3041 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3042 err = mlxsw_sp_nexthop_update
3043 (mlxsw_sp, adj_index, nh);
3045 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3046 err = mlxsw_sp_nexthop_ipip_update
3047 (mlxsw_sp, adj_index, nh);
3055 adj_index += nh->num_adj_entries;
3061 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3062 const struct mlxsw_sp_fib_entry *fib_entry);
3065 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3066 struct mlxsw_sp_nexthop_group *nh_grp)
3068 struct mlxsw_sp_fib_entry *fib_entry;
3071 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3072 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3075 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3083 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3084 enum mlxsw_reg_ralue_op op, int err);
3087 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3089 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3090 struct mlxsw_sp_fib_entry *fib_entry;
3092 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3093 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3096 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3100 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3102 /* Valid sizes for an adjacency group are:
3103 * 1-64, 512, 1024, 2048 and 4096.
3105 if (*p_adj_grp_size <= 64)
3107 else if (*p_adj_grp_size <= 512)
3108 *p_adj_grp_size = 512;
3109 else if (*p_adj_grp_size <= 1024)
3110 *p_adj_grp_size = 1024;
3111 else if (*p_adj_grp_size <= 2048)
3112 *p_adj_grp_size = 2048;
3114 *p_adj_grp_size = 4096;
3117 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3118 unsigned int alloc_size)
3120 if (alloc_size >= 4096)
3121 *p_adj_grp_size = 4096;
3122 else if (alloc_size >= 2048)
3123 *p_adj_grp_size = 2048;
3124 else if (alloc_size >= 1024)
3125 *p_adj_grp_size = 1024;
3126 else if (alloc_size >= 512)
3127 *p_adj_grp_size = 512;
3130 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3131 u16 *p_adj_grp_size)
3133 unsigned int alloc_size;
3136 /* Round up the requested group size to the next size supported
3137 * by the device and make sure the request can be satisfied.
3139 mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3140 err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3141 MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3142 *p_adj_grp_size, &alloc_size);
3145 /* It is possible the allocation results in more allocated
3146 * entries than requested. Try to use as much of them as
3149 mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3155 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3157 int i, g = 0, sum_norm_weight = 0;
3158 struct mlxsw_sp_nexthop *nh;
3160 for (i = 0; i < nh_grp->count; i++) {
3161 nh = &nh_grp->nexthops[i];
3163 if (!nh->should_offload)
3166 g = gcd(nh->nh_weight, g);
3171 for (i = 0; i < nh_grp->count; i++) {
3172 nh = &nh_grp->nexthops[i];
3174 if (!nh->should_offload)
3176 nh->norm_nh_weight = nh->nh_weight / g;
3177 sum_norm_weight += nh->norm_nh_weight;
3180 nh_grp->sum_norm_weight = sum_norm_weight;
3184 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3186 int total = nh_grp->sum_norm_weight;
3187 u16 ecmp_size = nh_grp->ecmp_size;
3188 int i, weight = 0, lower_bound = 0;
3190 for (i = 0; i < nh_grp->count; i++) {
3191 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3194 if (!nh->should_offload)
3196 weight += nh->norm_nh_weight;
3197 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3198 nh->num_adj_entries = upper_bound - lower_bound;
3199 lower_bound = upper_bound;
3204 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3205 struct mlxsw_sp_nexthop_group *nh_grp)
3207 u16 ecmp_size, old_ecmp_size;
3208 struct mlxsw_sp_nexthop *nh;
3209 bool offload_change = false;
3211 bool old_adj_index_valid;
3216 if (!nh_grp->gateway) {
3217 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3221 for (i = 0; i < nh_grp->count; i++) {
3222 nh = &nh_grp->nexthops[i];
3224 if (nh->should_offload != nh->offloaded) {
3225 offload_change = true;
3226 if (nh->should_offload)
3230 if (!offload_change) {
3231 /* Nothing was added or removed, so no need to reallocate. Just
3232 * update MAC on existing adjacency indexes.
3234 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3236 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3241 mlxsw_sp_nexthop_group_normalize(nh_grp);
3242 if (!nh_grp->sum_norm_weight)
3243 /* No neigh of this group is connected so we just set
3244 * the trap and let everthing flow through kernel.
3248 ecmp_size = nh_grp->sum_norm_weight;
3249 err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3251 /* No valid allocation size available. */
3254 err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3255 ecmp_size, &adj_index);
3257 /* We ran out of KVD linear space, just set the
3258 * trap and let everything flow through kernel.
3260 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3263 old_adj_index_valid = nh_grp->adj_index_valid;
3264 old_adj_index = nh_grp->adj_index;
3265 old_ecmp_size = nh_grp->ecmp_size;
3266 nh_grp->adj_index_valid = 1;
3267 nh_grp->adj_index = adj_index;
3268 nh_grp->ecmp_size = ecmp_size;
3269 mlxsw_sp_nexthop_group_rebalance(nh_grp);
3270 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3272 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3276 if (!old_adj_index_valid) {
3277 /* The trap was set for fib entries, so we have to call
3278 * fib entry update to unset it and use adjacency index.
3280 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3282 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3288 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3289 old_adj_index, old_ecmp_size);
3290 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3291 old_ecmp_size, old_adj_index);
3293 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3297 /* Offload state within the group changed, so update the flags. */
3298 mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3303 old_adj_index_valid = nh_grp->adj_index_valid;
3304 nh_grp->adj_index_valid = 0;
3305 for (i = 0; i < nh_grp->count; i++) {
3306 nh = &nh_grp->nexthops[i];
3309 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3311 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3312 if (old_adj_index_valid)
3313 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3314 nh_grp->ecmp_size, nh_grp->adj_index);
3317 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3321 nh->should_offload = 1;
3323 nh->should_offload = 0;
3328 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3329 struct mlxsw_sp_neigh_entry *neigh_entry)
3331 struct neighbour *n, *old_n = neigh_entry->key.n;
3332 struct mlxsw_sp_nexthop *nh;
3333 bool entry_connected;
3337 nh = list_first_entry(&neigh_entry->nexthop_list,
3338 struct mlxsw_sp_nexthop, neigh_list_node);
3340 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3342 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3346 neigh_event_send(n, NULL);
3349 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3350 neigh_entry->key.n = n;
3351 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3353 goto err_neigh_entry_insert;
3355 read_lock_bh(&n->lock);
3356 nud_state = n->nud_state;
3358 read_unlock_bh(&n->lock);
3359 entry_connected = nud_state & NUD_VALID && !dead;
3361 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3363 neigh_release(old_n);
3365 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3366 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3373 err_neigh_entry_insert:
3374 neigh_entry->key.n = old_n;
3375 mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3381 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3382 struct mlxsw_sp_neigh_entry *neigh_entry,
3383 bool removing, bool dead)
3385 struct mlxsw_sp_nexthop *nh;
3387 if (list_empty(&neigh_entry->nexthop_list))
3393 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3396 dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3400 list_for_each_entry(nh, &neigh_entry->nexthop_list,
3402 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3403 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3407 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3408 struct mlxsw_sp_rif *rif)
3414 list_add(&nh->rif_list_node, &rif->nexthop_list);
3417 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3422 list_del(&nh->rif_list_node);
3426 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3427 struct mlxsw_sp_nexthop *nh)
3429 struct mlxsw_sp_neigh_entry *neigh_entry;
3430 struct neighbour *n;
3434 if (!nh->nh_grp->gateway || nh->neigh_entry)
3437 /* Take a reference of neigh here ensuring that neigh would
3438 * not be destructed before the nexthop entry is finished.
3439 * The reference is taken either in neigh_lookup() or
3440 * in neigh_create() in case n is not found.
3442 n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3444 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3448 neigh_event_send(n, NULL);
3450 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3452 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3453 if (IS_ERR(neigh_entry)) {
3455 goto err_neigh_entry_create;
3459 /* If that is the first nexthop connected to that neigh, add to
3460 * nexthop_neighs_list
3462 if (list_empty(&neigh_entry->nexthop_list))
3463 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3464 &mlxsw_sp->router->nexthop_neighs_list);
3466 nh->neigh_entry = neigh_entry;
3467 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3468 read_lock_bh(&n->lock);
3469 nud_state = n->nud_state;
3471 read_unlock_bh(&n->lock);
3472 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3476 err_neigh_entry_create:
3481 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3482 struct mlxsw_sp_nexthop *nh)
3484 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3485 struct neighbour *n;
3489 n = neigh_entry->key.n;
3491 __mlxsw_sp_nexthop_neigh_update(nh, true);
3492 list_del(&nh->neigh_list_node);
3493 nh->neigh_entry = NULL;
3495 /* If that is the last nexthop connected to that neigh, remove from
3496 * nexthop_neighs_list
3498 if (list_empty(&neigh_entry->nexthop_list))
3499 list_del(&neigh_entry->nexthop_neighs_list_node);
3501 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3502 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3507 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3509 struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3511 return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3514 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3515 struct mlxsw_sp_nexthop *nh,
3516 struct mlxsw_sp_ipip_entry *ipip_entry)
3520 if (!nh->nh_grp->gateway || nh->ipip_entry)
3523 nh->ipip_entry = ipip_entry;
3524 removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3525 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3526 mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3529 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3530 struct mlxsw_sp_nexthop *nh)
3532 struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3537 __mlxsw_sp_nexthop_neigh_update(nh, true);
3538 nh->ipip_entry = NULL;
3541 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3542 const struct fib_nh *fib_nh,
3543 enum mlxsw_sp_ipip_type *p_ipipt)
3545 struct net_device *dev = fib_nh->nh_dev;
3548 fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3549 mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3552 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3553 struct mlxsw_sp_nexthop *nh)
3556 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3557 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3558 mlxsw_sp_nexthop_rif_fini(nh);
3560 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3561 mlxsw_sp_nexthop_rif_fini(nh);
3562 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3567 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3568 struct mlxsw_sp_nexthop *nh,
3569 struct fib_nh *fib_nh)
3571 const struct mlxsw_sp_ipip_ops *ipip_ops;
3572 struct net_device *dev = fib_nh->nh_dev;
3573 struct mlxsw_sp_ipip_entry *ipip_entry;
3574 struct mlxsw_sp_rif *rif;
3577 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3579 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3580 if (ipip_ops->can_offload(mlxsw_sp, dev,
3581 MLXSW_SP_L3_PROTO_IPV4)) {
3582 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3583 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3588 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3589 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3593 mlxsw_sp_nexthop_rif_init(nh, rif);
3594 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3596 goto err_neigh_init;
3601 mlxsw_sp_nexthop_rif_fini(nh);
3605 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3606 struct mlxsw_sp_nexthop *nh)
3608 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3611 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3612 struct mlxsw_sp_nexthop_group *nh_grp,
3613 struct mlxsw_sp_nexthop *nh,
3614 struct fib_nh *fib_nh)
3616 struct net_device *dev = fib_nh->nh_dev;
3617 struct in_device *in_dev;
3620 nh->nh_grp = nh_grp;
3621 nh->key.fib_nh = fib_nh;
3622 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3623 nh->nh_weight = fib_nh->nh_weight;
3627 memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3628 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3632 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3633 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3638 in_dev = __in_dev_get_rtnl(dev);
3639 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3640 fib_nh->nh_flags & RTNH_F_LINKDOWN)
3643 err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3645 goto err_nexthop_neigh_init;
3649 err_nexthop_neigh_init:
3650 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3654 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3655 struct mlxsw_sp_nexthop *nh)
3657 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3658 list_del(&nh->router_list_node);
3659 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3660 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3663 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3664 unsigned long event, struct fib_nh *fib_nh)
3666 struct mlxsw_sp_nexthop_key key;
3667 struct mlxsw_sp_nexthop *nh;
3669 if (mlxsw_sp->router->aborted)
3672 key.fib_nh = fib_nh;
3673 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3674 if (WARN_ON_ONCE(!nh))
3678 case FIB_EVENT_NH_ADD:
3679 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3681 case FIB_EVENT_NH_DEL:
3682 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3686 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3689 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3690 struct mlxsw_sp_rif *rif)
3692 struct mlxsw_sp_nexthop *nh;
3695 list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3697 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3700 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3701 removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3708 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3709 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3713 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3714 struct mlxsw_sp_rif *old_rif,
3715 struct mlxsw_sp_rif *new_rif)
3717 struct mlxsw_sp_nexthop *nh;
3719 list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3720 list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3722 mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3725 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3726 struct mlxsw_sp_rif *rif)
3728 struct mlxsw_sp_nexthop *nh, *tmp;
3730 list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3731 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3732 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3736 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3737 const struct fib_info *fi)
3739 return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3740 mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3743 static struct mlxsw_sp_nexthop_group *
3744 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3746 struct mlxsw_sp_nexthop_group *nh_grp;
3747 struct mlxsw_sp_nexthop *nh;
3748 struct fib_nh *fib_nh;
3753 alloc_size = sizeof(*nh_grp) +
3754 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3755 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3757 return ERR_PTR(-ENOMEM);
3759 INIT_LIST_HEAD(&nh_grp->fib_list);
3760 nh_grp->neigh_tbl = &arp_tbl;
3762 nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3763 nh_grp->count = fi->fib_nhs;
3765 for (i = 0; i < nh_grp->count; i++) {
3766 nh = &nh_grp->nexthops[i];
3767 fib_nh = &fi->fib_nh[i];
3768 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3770 goto err_nexthop4_init;
3772 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3774 goto err_nexthop_group_insert;
3775 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3778 err_nexthop_group_insert:
3780 for (i--; i >= 0; i--) {
3781 nh = &nh_grp->nexthops[i];
3782 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3786 return ERR_PTR(err);
3790 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3791 struct mlxsw_sp_nexthop_group *nh_grp)
3793 struct mlxsw_sp_nexthop *nh;
3796 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3797 for (i = 0; i < nh_grp->count; i++) {
3798 nh = &nh_grp->nexthops[i];
3799 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3801 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3802 WARN_ON_ONCE(nh_grp->adj_index_valid);
3803 fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3807 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3808 struct mlxsw_sp_fib_entry *fib_entry,
3809 struct fib_info *fi)
3811 struct mlxsw_sp_nexthop_group *nh_grp;
3813 nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3815 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3817 return PTR_ERR(nh_grp);
3819 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3820 fib_entry->nh_group = nh_grp;
3824 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3825 struct mlxsw_sp_fib_entry *fib_entry)
3827 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3829 list_del(&fib_entry->nexthop_group_node);
3830 if (!list_empty(&nh_grp->fib_list))
3832 mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3836 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3838 struct mlxsw_sp_fib4_entry *fib4_entry;
3840 fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3842 return !fib4_entry->tos;
3846 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3848 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3850 switch (fib_entry->fib_node->fib->proto) {
3851 case MLXSW_SP_L3_PROTO_IPV4:
3852 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3855 case MLXSW_SP_L3_PROTO_IPV6:
3859 switch (fib_entry->type) {
3860 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3861 return !!nh_group->adj_index_valid;
3862 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3863 return !!nh_group->nh_rif;
3864 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3871 static struct mlxsw_sp_nexthop *
3872 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3873 const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3877 for (i = 0; i < nh_grp->count; i++) {
3878 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3879 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3881 if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3882 ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3883 &rt->fib6_nh.nh_gw))
3892 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3894 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3897 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3898 fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3899 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3903 for (i = 0; i < nh_grp->count; i++) {
3904 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3907 nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3909 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3914 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3916 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3919 if (!list_is_singular(&nh_grp->fib_list))
3922 for (i = 0; i < nh_grp->count; i++) {
3923 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3925 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3930 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3932 struct mlxsw_sp_fib6_entry *fib6_entry;
3933 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3935 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3938 if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3939 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3940 list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3944 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3945 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3946 struct mlxsw_sp_nexthop *nh;
3948 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3949 if (nh && nh->offloaded)
3950 mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3952 mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3957 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3959 struct mlxsw_sp_fib6_entry *fib6_entry;
3960 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3962 fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3964 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3965 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3967 rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3971 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3973 switch (fib_entry->fib_node->fib->proto) {
3974 case MLXSW_SP_L3_PROTO_IPV4:
3975 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3977 case MLXSW_SP_L3_PROTO_IPV6:
3978 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3984 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3986 switch (fib_entry->fib_node->fib->proto) {
3987 case MLXSW_SP_L3_PROTO_IPV4:
3988 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3990 case MLXSW_SP_L3_PROTO_IPV6:
3991 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3997 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3998 enum mlxsw_reg_ralue_op op, int err)
4001 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4002 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4003 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4006 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4007 mlxsw_sp_fib_entry_offload_set(fib_entry);
4009 mlxsw_sp_fib_entry_offload_unset(fib_entry);
4017 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4018 const struct mlxsw_sp_fib_entry *fib_entry,
4019 enum mlxsw_reg_ralue_op op)
4021 struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4022 enum mlxsw_reg_ralxx_protocol proto;
4025 proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4027 switch (fib->proto) {
4028 case MLXSW_SP_L3_PROTO_IPV4:
4029 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4030 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4031 fib_entry->fib_node->key.prefix_len,
4034 case MLXSW_SP_L3_PROTO_IPV6:
4035 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4036 fib_entry->fib_node->key.prefix_len,
4037 fib_entry->fib_node->key.addr);
4042 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4043 struct mlxsw_sp_fib_entry *fib_entry,
4044 enum mlxsw_reg_ralue_op op)
4046 char ralue_pl[MLXSW_REG_RALUE_LEN];
4047 enum mlxsw_reg_ralue_trap_action trap_action;
4049 u32 adjacency_index = 0;
4052 /* In case the nexthop group adjacency index is valid, use it
4053 * with provided ECMP size. Otherwise, setup trap and pass
4054 * traffic to kernel.
4056 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4057 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4058 adjacency_index = fib_entry->nh_group->adj_index;
4059 ecmp_size = fib_entry->nh_group->ecmp_size;
4061 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4062 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4065 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4066 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4067 adjacency_index, ecmp_size);
4068 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4071 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4072 struct mlxsw_sp_fib_entry *fib_entry,
4073 enum mlxsw_reg_ralue_op op)
4075 struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4076 enum mlxsw_reg_ralue_trap_action trap_action;
4077 char ralue_pl[MLXSW_REG_RALUE_LEN];
4081 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4082 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4083 rif_index = rif->rif_index;
4085 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4086 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4089 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4090 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4092 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4095 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4096 struct mlxsw_sp_fib_entry *fib_entry,
4097 enum mlxsw_reg_ralue_op op)
4099 char ralue_pl[MLXSW_REG_RALUE_LEN];
4101 mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4102 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4103 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4107 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4108 struct mlxsw_sp_fib_entry *fib_entry,
4109 enum mlxsw_reg_ralue_op op)
4111 struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4112 const struct mlxsw_sp_ipip_ops *ipip_ops;
4114 if (WARN_ON(!ipip_entry))
4117 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4118 return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4119 fib_entry->decap.tunnel_index);
4122 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4123 struct mlxsw_sp_fib_entry *fib_entry,
4124 enum mlxsw_reg_ralue_op op)
4126 switch (fib_entry->type) {
4127 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4128 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4129 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4130 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4131 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4132 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4133 case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4134 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4140 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4141 struct mlxsw_sp_fib_entry *fib_entry,
4142 enum mlxsw_reg_ralue_op op)
4144 int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4146 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4151 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4152 struct mlxsw_sp_fib_entry *fib_entry)
4154 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4155 MLXSW_REG_RALUE_OP_WRITE_WRITE);
4158 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4159 struct mlxsw_sp_fib_entry *fib_entry)
4161 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4162 MLXSW_REG_RALUE_OP_WRITE_DELETE);
4166 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4167 const struct fib_entry_notifier_info *fen_info,
4168 struct mlxsw_sp_fib_entry *fib_entry)
4170 union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4171 struct net_device *dev = fen_info->fi->fib_dev;
4172 struct mlxsw_sp_ipip_entry *ipip_entry;
4173 struct fib_info *fi = fen_info->fi;
4175 switch (fen_info->type) {
4177 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4178 MLXSW_SP_L3_PROTO_IPV4, dip);
4179 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4180 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4181 return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4187 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4189 case RTN_UNREACHABLE: /* fall through */
4190 case RTN_BLACKHOLE: /* fall through */
4192 /* Packets hitting these routes need to be trapped, but
4193 * can do so with a lower priority than packets directed
4194 * at the host, so use action type local instead of trap.
4196 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4199 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4200 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4202 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4209 static struct mlxsw_sp_fib4_entry *
4210 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4211 struct mlxsw_sp_fib_node *fib_node,
4212 const struct fib_entry_notifier_info *fen_info)
4214 struct mlxsw_sp_fib4_entry *fib4_entry;
4215 struct mlxsw_sp_fib_entry *fib_entry;
4218 fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4220 return ERR_PTR(-ENOMEM);
4221 fib_entry = &fib4_entry->common;
4223 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4225 goto err_fib4_entry_type_set;
4227 err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4229 goto err_nexthop4_group_get;
4231 fib4_entry->prio = fen_info->fi->fib_priority;
4232 fib4_entry->tb_id = fen_info->tb_id;
4233 fib4_entry->type = fen_info->type;
4234 fib4_entry->tos = fen_info->tos;
4236 fib_entry->fib_node = fib_node;
4240 err_nexthop4_group_get:
4241 err_fib4_entry_type_set:
4243 return ERR_PTR(err);
4246 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4247 struct mlxsw_sp_fib4_entry *fib4_entry)
4249 mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4253 static struct mlxsw_sp_fib4_entry *
4254 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4255 const struct fib_entry_notifier_info *fen_info)
4257 struct mlxsw_sp_fib4_entry *fib4_entry;
4258 struct mlxsw_sp_fib_node *fib_node;
4259 struct mlxsw_sp_fib *fib;
4260 struct mlxsw_sp_vr *vr;
4262 vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4265 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4267 fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4268 sizeof(fen_info->dst),
4273 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4274 if (fib4_entry->tb_id == fen_info->tb_id &&
4275 fib4_entry->tos == fen_info->tos &&
4276 fib4_entry->type == fen_info->type &&
4277 mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4286 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4287 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4288 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4289 .key_len = sizeof(struct mlxsw_sp_fib_key),
4290 .automatic_shrinking = true,
4293 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4294 struct mlxsw_sp_fib_node *fib_node)
4296 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4297 mlxsw_sp_fib_ht_params);
4300 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4301 struct mlxsw_sp_fib_node *fib_node)
4303 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4304 mlxsw_sp_fib_ht_params);
4307 static struct mlxsw_sp_fib_node *
4308 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4309 size_t addr_len, unsigned char prefix_len)
4311 struct mlxsw_sp_fib_key key;
4313 memset(&key, 0, sizeof(key));
4314 memcpy(key.addr, addr, addr_len);
4315 key.prefix_len = prefix_len;
4316 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4319 static struct mlxsw_sp_fib_node *
4320 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4321 size_t addr_len, unsigned char prefix_len)
4323 struct mlxsw_sp_fib_node *fib_node;
4325 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4329 INIT_LIST_HEAD(&fib_node->entry_list);
4330 list_add(&fib_node->list, &fib->node_list);
4331 memcpy(fib_node->key.addr, addr, addr_len);
4332 fib_node->key.prefix_len = prefix_len;
4337 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4339 list_del(&fib_node->list);
4340 WARN_ON(!list_empty(&fib_node->entry_list));
4345 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4346 const struct mlxsw_sp_fib_entry *fib_entry)
4348 return list_first_entry(&fib_node->entry_list,
4349 struct mlxsw_sp_fib_entry, list) == fib_entry;
4352 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4353 struct mlxsw_sp_fib_node *fib_node)
4355 struct mlxsw_sp_prefix_usage req_prefix_usage;
4356 struct mlxsw_sp_fib *fib = fib_node->fib;
4357 struct mlxsw_sp_lpm_tree *lpm_tree;
4360 lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4361 if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4364 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4365 mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4366 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4368 if (IS_ERR(lpm_tree))
4369 return PTR_ERR(lpm_tree);
4371 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4373 goto err_lpm_tree_replace;
4376 lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4379 err_lpm_tree_replace:
4380 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4384 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4385 struct mlxsw_sp_fib_node *fib_node)
4387 struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4388 struct mlxsw_sp_prefix_usage req_prefix_usage;
4389 struct mlxsw_sp_fib *fib = fib_node->fib;
4392 if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4394 /* Try to construct a new LPM tree from the current prefix usage
4395 * minus the unused one. If we fail, continue using the old one.
4397 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4398 mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4399 fib_node->key.prefix_len);
4400 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4402 if (IS_ERR(lpm_tree))
4405 err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4407 goto err_lpm_tree_replace;
4411 err_lpm_tree_replace:
4412 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4415 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4416 struct mlxsw_sp_fib_node *fib_node,
4417 struct mlxsw_sp_fib *fib)
4421 err = mlxsw_sp_fib_node_insert(fib, fib_node);
4424 fib_node->fib = fib;
4426 err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4428 goto err_fib_lpm_tree_link;
4432 err_fib_lpm_tree_link:
4433 fib_node->fib = NULL;
4434 mlxsw_sp_fib_node_remove(fib, fib_node);
4438 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4439 struct mlxsw_sp_fib_node *fib_node)
4441 struct mlxsw_sp_fib *fib = fib_node->fib;
4443 mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4444 fib_node->fib = NULL;
4445 mlxsw_sp_fib_node_remove(fib, fib_node);
4448 static struct mlxsw_sp_fib_node *
4449 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4450 size_t addr_len, unsigned char prefix_len,
4451 enum mlxsw_sp_l3proto proto)
4453 struct mlxsw_sp_fib_node *fib_node;
4454 struct mlxsw_sp_fib *fib;
4455 struct mlxsw_sp_vr *vr;
4458 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4460 return ERR_CAST(vr);
4461 fib = mlxsw_sp_vr_fib(vr, proto);
4463 fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4467 fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4470 goto err_fib_node_create;
4473 err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4475 goto err_fib_node_init;
4480 mlxsw_sp_fib_node_destroy(fib_node);
4481 err_fib_node_create:
4482 mlxsw_sp_vr_put(mlxsw_sp, vr);
4483 return ERR_PTR(err);
4486 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4487 struct mlxsw_sp_fib_node *fib_node)
4489 struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4491 if (!list_empty(&fib_node->entry_list))
4493 mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4494 mlxsw_sp_fib_node_destroy(fib_node);
4495 mlxsw_sp_vr_put(mlxsw_sp, vr);
4498 static struct mlxsw_sp_fib4_entry *
4499 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4500 const struct mlxsw_sp_fib4_entry *new4_entry)
4502 struct mlxsw_sp_fib4_entry *fib4_entry;
4504 list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4505 if (fib4_entry->tb_id > new4_entry->tb_id)
4507 if (fib4_entry->tb_id != new4_entry->tb_id)
4509 if (fib4_entry->tos > new4_entry->tos)
4511 if (fib4_entry->prio >= new4_entry->prio ||
4512 fib4_entry->tos < new4_entry->tos)
4520 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4521 struct mlxsw_sp_fib4_entry *new4_entry)
4523 struct mlxsw_sp_fib_node *fib_node;
4525 if (WARN_ON(!fib4_entry))
4528 fib_node = fib4_entry->common.fib_node;
4529 list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4531 if (fib4_entry->tb_id != new4_entry->tb_id ||
4532 fib4_entry->tos != new4_entry->tos ||
4533 fib4_entry->prio != new4_entry->prio)
4537 list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4542 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4543 bool replace, bool append)
4545 struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4546 struct mlxsw_sp_fib4_entry *fib4_entry;
4548 fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4551 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4552 if (replace && WARN_ON(!fib4_entry))
4555 /* Insert new entry before replaced one, so that we can later
4556 * remove the second.
4559 list_add_tail(&new4_entry->common.list,
4560 &fib4_entry->common.list);
4562 struct mlxsw_sp_fib4_entry *last;
4564 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4565 if (new4_entry->tb_id > last->tb_id)
4571 list_add(&new4_entry->common.list,
4572 &fib4_entry->common.list);
4574 list_add(&new4_entry->common.list,
4575 &fib_node->entry_list);
4582 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4584 list_del(&fib4_entry->common.list);
4587 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4588 struct mlxsw_sp_fib_entry *fib_entry)
4590 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4592 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4595 /* To prevent packet loss, overwrite the previously offloaded
4598 if (!list_is_singular(&fib_node->entry_list)) {
4599 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4600 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4602 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4605 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4608 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4609 struct mlxsw_sp_fib_entry *fib_entry)
4611 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4613 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4616 /* Promote the next entry by overwriting the deleted entry */
4617 if (!list_is_singular(&fib_node->entry_list)) {
4618 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4619 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4621 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4622 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4626 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4629 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4630 struct mlxsw_sp_fib4_entry *fib4_entry,
4631 bool replace, bool append)
4635 err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4639 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4641 goto err_fib_node_entry_add;
4645 err_fib_node_entry_add:
4646 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4651 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4652 struct mlxsw_sp_fib4_entry *fib4_entry)
4654 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4655 mlxsw_sp_fib4_node_list_remove(fib4_entry);
4657 if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4658 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4661 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4662 struct mlxsw_sp_fib4_entry *fib4_entry,
4665 struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4666 struct mlxsw_sp_fib4_entry *replaced;
4671 /* We inserted the new entry before replaced one */
4672 replaced = list_next_entry(fib4_entry, common.list);
4674 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4675 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4676 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4680 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4681 const struct fib_entry_notifier_info *fen_info,
4682 bool replace, bool append)
4684 struct mlxsw_sp_fib4_entry *fib4_entry;
4685 struct mlxsw_sp_fib_node *fib_node;
4688 if (mlxsw_sp->router->aborted)
4691 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4692 &fen_info->dst, sizeof(fen_info->dst),
4694 MLXSW_SP_L3_PROTO_IPV4);
4695 if (IS_ERR(fib_node)) {
4696 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4697 return PTR_ERR(fib_node);
4700 fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4701 if (IS_ERR(fib4_entry)) {
4702 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4703 err = PTR_ERR(fib4_entry);
4704 goto err_fib4_entry_create;
4707 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4710 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4711 goto err_fib4_node_entry_link;
4714 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4718 err_fib4_node_entry_link:
4719 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4720 err_fib4_entry_create:
4721 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4725 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4726 struct fib_entry_notifier_info *fen_info)
4728 struct mlxsw_sp_fib4_entry *fib4_entry;
4729 struct mlxsw_sp_fib_node *fib_node;
4731 if (mlxsw_sp->router->aborted)
4734 fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4735 if (WARN_ON(!fib4_entry))
4737 fib_node = fib4_entry->common.fib_node;
4739 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4740 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4741 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4744 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4746 /* Packets with link-local destination IP arriving to the router
4747 * are trapped to the CPU, so no need to program specific routes
4750 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4753 /* Multicast routes aren't supported, so ignore them. Neighbour
4754 * Discovery packets are specifically trapped.
4756 if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4759 /* Cloned routes are irrelevant in the forwarding path. */
4760 if (rt->fib6_flags & RTF_CACHE)
4766 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4768 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4770 mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4772 return ERR_PTR(-ENOMEM);
4774 /* In case of route replace, replaced route is deleted with
4775 * no notification. Take reference to prevent accessing freed
4778 mlxsw_sp_rt6->rt = rt;
4781 return mlxsw_sp_rt6;
4784 #if IS_ENABLED(CONFIG_IPV6)
4785 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4787 fib6_info_release(rt);
4790 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4795 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4797 mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4798 kfree(mlxsw_sp_rt6);
4801 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4803 /* RTF_CACHE routes are ignored */
4804 return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4807 static struct fib6_info *
4808 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4810 return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4814 static struct mlxsw_sp_fib6_entry *
4815 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4816 const struct fib6_info *nrt, bool replace)
4818 struct mlxsw_sp_fib6_entry *fib6_entry;
4820 if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4823 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4824 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4826 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4829 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4831 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4833 if (rt->fib6_metric < nrt->fib6_metric)
4835 if (rt->fib6_metric == nrt->fib6_metric &&
4836 mlxsw_sp_fib6_rt_can_mp(rt))
4838 if (rt->fib6_metric > nrt->fib6_metric)
4845 static struct mlxsw_sp_rt6 *
4846 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4847 const struct fib6_info *rt)
4849 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4851 list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4852 if (mlxsw_sp_rt6->rt == rt)
4853 return mlxsw_sp_rt6;
4859 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4860 const struct fib6_info *rt,
4861 enum mlxsw_sp_ipip_type *ret)
4863 return rt->fib6_nh.nh_dev &&
4864 mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4867 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4868 struct mlxsw_sp_nexthop_group *nh_grp,
4869 struct mlxsw_sp_nexthop *nh,
4870 const struct fib6_info *rt)
4872 const struct mlxsw_sp_ipip_ops *ipip_ops;
4873 struct mlxsw_sp_ipip_entry *ipip_entry;
4874 struct net_device *dev = rt->fib6_nh.nh_dev;
4875 struct mlxsw_sp_rif *rif;
4878 ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4880 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4881 if (ipip_ops->can_offload(mlxsw_sp, dev,
4882 MLXSW_SP_L3_PROTO_IPV6)) {
4883 nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4884 mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4889 nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4890 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4893 mlxsw_sp_nexthop_rif_init(nh, rif);
4895 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4897 goto err_nexthop_neigh_init;
4901 err_nexthop_neigh_init:
4902 mlxsw_sp_nexthop_rif_fini(nh);
4906 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4907 struct mlxsw_sp_nexthop *nh)
4909 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4912 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4913 struct mlxsw_sp_nexthop_group *nh_grp,
4914 struct mlxsw_sp_nexthop *nh,
4915 const struct fib6_info *rt)
4917 struct net_device *dev = rt->fib6_nh.nh_dev;
4919 nh->nh_grp = nh_grp;
4920 nh->nh_weight = rt->fib6_nh.nh_weight;
4921 memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4922 mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4924 list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4928 nh->ifindex = dev->ifindex;
4930 return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4933 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4934 struct mlxsw_sp_nexthop *nh)
4936 mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4937 list_del(&nh->router_list_node);
4938 mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4941 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4942 const struct fib6_info *rt)
4944 return rt->fib6_flags & RTF_GATEWAY ||
4945 mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4948 static struct mlxsw_sp_nexthop_group *
4949 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4950 struct mlxsw_sp_fib6_entry *fib6_entry)
4952 struct mlxsw_sp_nexthop_group *nh_grp;
4953 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4954 struct mlxsw_sp_nexthop *nh;
4959 alloc_size = sizeof(*nh_grp) +
4960 fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4961 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4963 return ERR_PTR(-ENOMEM);
4964 INIT_LIST_HEAD(&nh_grp->fib_list);
4965 #if IS_ENABLED(CONFIG_IPV6)
4966 nh_grp->neigh_tbl = &nd_tbl;
4968 mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4969 struct mlxsw_sp_rt6, list);
4970 nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4971 nh_grp->count = fib6_entry->nrt6;
4972 for (i = 0; i < nh_grp->count; i++) {
4973 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4975 nh = &nh_grp->nexthops[i];
4976 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4978 goto err_nexthop6_init;
4979 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4982 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4984 goto err_nexthop_group_insert;
4986 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4989 err_nexthop_group_insert:
4991 for (i--; i >= 0; i--) {
4992 nh = &nh_grp->nexthops[i];
4993 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4996 return ERR_PTR(err);
5000 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5001 struct mlxsw_sp_nexthop_group *nh_grp)
5003 struct mlxsw_sp_nexthop *nh;
5004 int i = nh_grp->count;
5006 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5007 for (i--; i >= 0; i--) {
5008 nh = &nh_grp->nexthops[i];
5009 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5011 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5012 WARN_ON(nh_grp->adj_index_valid);
5016 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5017 struct mlxsw_sp_fib6_entry *fib6_entry)
5019 struct mlxsw_sp_nexthop_group *nh_grp;
5021 nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5023 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5025 return PTR_ERR(nh_grp);
5028 list_add_tail(&fib6_entry->common.nexthop_group_node,
5030 fib6_entry->common.nh_group = nh_grp;
5035 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5036 struct mlxsw_sp_fib_entry *fib_entry)
5038 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5040 list_del(&fib_entry->nexthop_group_node);
5041 if (!list_empty(&nh_grp->fib_list))
5043 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5047 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5048 struct mlxsw_sp_fib6_entry *fib6_entry)
5050 struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5053 fib6_entry->common.nh_group = NULL;
5054 list_del(&fib6_entry->common.nexthop_group_node);
5056 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5058 goto err_nexthop6_group_get;
5060 /* In case this entry is offloaded, then the adjacency index
5061 * currently associated with it in the device's table is that
5062 * of the old group. Start using the new one instead.
5064 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5066 goto err_fib_node_entry_add;
5068 if (list_empty(&old_nh_grp->fib_list))
5069 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5073 err_fib_node_entry_add:
5074 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5075 err_nexthop6_group_get:
5076 list_add_tail(&fib6_entry->common.nexthop_group_node,
5077 &old_nh_grp->fib_list);
5078 fib6_entry->common.nh_group = old_nh_grp;
5083 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5084 struct mlxsw_sp_fib6_entry *fib6_entry,
5085 struct fib6_info *rt)
5087 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5090 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5091 if (IS_ERR(mlxsw_sp_rt6))
5092 return PTR_ERR(mlxsw_sp_rt6);
5094 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5097 err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5099 goto err_nexthop6_group_update;
5103 err_nexthop6_group_update:
5105 list_del(&mlxsw_sp_rt6->list);
5106 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5111 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5112 struct mlxsw_sp_fib6_entry *fib6_entry,
5113 struct fib6_info *rt)
5115 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5117 mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5118 if (WARN_ON(!mlxsw_sp_rt6))
5122 list_del(&mlxsw_sp_rt6->list);
5123 mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5124 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5127 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5128 struct mlxsw_sp_fib_entry *fib_entry,
5129 const struct fib6_info *rt)
5131 /* Packets hitting RTF_REJECT routes need to be discarded by the
5132 * stack. We can rely on their destination device not having a
5133 * RIF (it's the loopback device) and can thus use action type
5134 * local, which will cause them to be trapped with a lower
5135 * priority than packets that need to be locally received.
5137 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5138 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5139 else if (rt->fib6_flags & RTF_REJECT)
5140 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5141 else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5142 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5144 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5148 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5150 struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5152 list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5155 list_del(&mlxsw_sp_rt6->list);
5156 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5160 static struct mlxsw_sp_fib6_entry *
5161 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5162 struct mlxsw_sp_fib_node *fib_node,
5163 struct fib6_info *rt)
5165 struct mlxsw_sp_fib6_entry *fib6_entry;
5166 struct mlxsw_sp_fib_entry *fib_entry;
5167 struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5170 fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5172 return ERR_PTR(-ENOMEM);
5173 fib_entry = &fib6_entry->common;
5175 mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5176 if (IS_ERR(mlxsw_sp_rt6)) {
5177 err = PTR_ERR(mlxsw_sp_rt6);
5178 goto err_rt6_create;
5181 mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5183 INIT_LIST_HEAD(&fib6_entry->rt6_list);
5184 list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5185 fib6_entry->nrt6 = 1;
5186 err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5188 goto err_nexthop6_group_get;
5190 fib_entry->fib_node = fib_node;
5194 err_nexthop6_group_get:
5195 list_del(&mlxsw_sp_rt6->list);
5196 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5199 return ERR_PTR(err);
5202 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5203 struct mlxsw_sp_fib6_entry *fib6_entry)
5205 mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5206 mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5207 WARN_ON(fib6_entry->nrt6);
5211 static struct mlxsw_sp_fib6_entry *
5212 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5213 const struct fib6_info *nrt, bool replace)
5215 struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5217 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5218 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5220 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5222 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5224 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5225 if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5226 mlxsw_sp_fib6_rt_can_mp(nrt))
5228 if (mlxsw_sp_fib6_rt_can_mp(nrt))
5229 fallback = fallback ?: fib6_entry;
5231 if (rt->fib6_metric > nrt->fib6_metric)
5232 return fallback ?: fib6_entry;
5239 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5242 struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5243 struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5244 struct mlxsw_sp_fib6_entry *fib6_entry;
5246 fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5248 if (replace && WARN_ON(!fib6_entry))
5252 list_add_tail(&new6_entry->common.list,
5253 &fib6_entry->common.list);
5255 struct mlxsw_sp_fib6_entry *last;
5257 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5258 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5260 if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5266 list_add(&new6_entry->common.list,
5267 &fib6_entry->common.list);
5269 list_add(&new6_entry->common.list,
5270 &fib_node->entry_list);
5277 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5279 list_del(&fib6_entry->common.list);
5282 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5283 struct mlxsw_sp_fib6_entry *fib6_entry,
5288 err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5292 err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5294 goto err_fib_node_entry_add;
5298 err_fib_node_entry_add:
5299 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5304 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5305 struct mlxsw_sp_fib6_entry *fib6_entry)
5307 mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5308 mlxsw_sp_fib6_node_list_remove(fib6_entry);
5311 static struct mlxsw_sp_fib6_entry *
5312 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5313 const struct fib6_info *rt)
5315 struct mlxsw_sp_fib6_entry *fib6_entry;
5316 struct mlxsw_sp_fib_node *fib_node;
5317 struct mlxsw_sp_fib *fib;
5318 struct mlxsw_sp_vr *vr;
5320 vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5323 fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5325 fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5326 sizeof(rt->fib6_dst.addr),
5331 list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5332 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5334 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5335 rt->fib6_metric == iter_rt->fib6_metric &&
5336 mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5343 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5344 struct mlxsw_sp_fib6_entry *fib6_entry,
5347 struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5348 struct mlxsw_sp_fib6_entry *replaced;
5353 replaced = list_next_entry(fib6_entry, common.list);
5355 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5356 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5357 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5360 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5361 struct fib6_info *rt, bool replace)
5363 struct mlxsw_sp_fib6_entry *fib6_entry;
5364 struct mlxsw_sp_fib_node *fib_node;
5367 if (mlxsw_sp->router->aborted)
5370 if (rt->fib6_src.plen)
5373 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5376 fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5378 sizeof(rt->fib6_dst.addr),
5380 MLXSW_SP_L3_PROTO_IPV6);
5381 if (IS_ERR(fib_node))
5382 return PTR_ERR(fib_node);
5384 /* Before creating a new entry, try to append route to an existing
5387 fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5389 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5391 goto err_fib6_entry_nexthop_add;
5395 fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5396 if (IS_ERR(fib6_entry)) {
5397 err = PTR_ERR(fib6_entry);
5398 goto err_fib6_entry_create;
5401 err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5403 goto err_fib6_node_entry_link;
5405 mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5409 err_fib6_node_entry_link:
5410 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5411 err_fib6_entry_create:
5412 err_fib6_entry_nexthop_add:
5413 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5417 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5418 struct fib6_info *rt)
5420 struct mlxsw_sp_fib6_entry *fib6_entry;
5421 struct mlxsw_sp_fib_node *fib_node;
5423 if (mlxsw_sp->router->aborted)
5426 if (mlxsw_sp_fib6_rt_should_ignore(rt))
5429 fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5430 if (WARN_ON(!fib6_entry))
5433 /* If route is part of a multipath entry, but not the last one
5434 * removed, then only reduce its nexthop group.
5436 if (!list_is_singular(&fib6_entry->rt6_list)) {
5437 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5441 fib_node = fib6_entry->common.fib_node;
5443 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5444 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5445 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5448 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5449 enum mlxsw_reg_ralxx_protocol proto,
5452 char ralta_pl[MLXSW_REG_RALTA_LEN];
5453 char ralst_pl[MLXSW_REG_RALST_LEN];
5456 mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5457 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5461 mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5462 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5466 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5467 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5468 char raltb_pl[MLXSW_REG_RALTB_LEN];
5469 char ralue_pl[MLXSW_REG_RALUE_LEN];
5471 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5472 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5477 mlxsw_reg_ralue_pack(ralue_pl, proto,
5478 MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5479 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5480 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5489 static struct mlxsw_sp_mr_table *
5490 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5492 if (family == RTNL_FAMILY_IPMR)
5493 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5495 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5498 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5499 struct mfc_entry_notifier_info *men_info,
5502 struct mlxsw_sp_mr_table *mrt;
5503 struct mlxsw_sp_vr *vr;
5505 if (mlxsw_sp->router->aborted)
5508 vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5512 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5513 return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5516 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5517 struct mfc_entry_notifier_info *men_info)
5519 struct mlxsw_sp_mr_table *mrt;
5520 struct mlxsw_sp_vr *vr;
5522 if (mlxsw_sp->router->aborted)
5525 vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5529 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5530 mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5531 mlxsw_sp_vr_put(mlxsw_sp, vr);
5535 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5536 struct vif_entry_notifier_info *ven_info)
5538 struct mlxsw_sp_mr_table *mrt;
5539 struct mlxsw_sp_rif *rif;
5540 struct mlxsw_sp_vr *vr;
5542 if (mlxsw_sp->router->aborted)
5545 vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5549 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5550 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5551 return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5552 ven_info->vif_index,
5553 ven_info->vif_flags, rif);
5557 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5558 struct vif_entry_notifier_info *ven_info)
5560 struct mlxsw_sp_mr_table *mrt;
5561 struct mlxsw_sp_vr *vr;
5563 if (mlxsw_sp->router->aborted)
5566 vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5570 mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5571 mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5572 mlxsw_sp_vr_put(mlxsw_sp, vr);
5575 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5577 enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5580 err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5581 MLXSW_SP_LPM_TREE_MIN);
5585 /* The multicast router code does not need an abort trap as by default,
5586 * packets that don't match any routes are trapped to the CPU.
5589 proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5590 return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5591 MLXSW_SP_LPM_TREE_MIN + 1);
5594 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5595 struct mlxsw_sp_fib_node *fib_node)
5597 struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5599 list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5601 bool do_break = &tmp->common.list == &fib_node->entry_list;
5603 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5604 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5605 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5606 /* Break when entry list is empty and node was freed.
5607 * Otherwise, we'll access freed memory in the next
5615 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5616 struct mlxsw_sp_fib_node *fib_node)
5618 struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5620 list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5622 bool do_break = &tmp->common.list == &fib_node->entry_list;
5624 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5625 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5626 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5632 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5633 struct mlxsw_sp_fib_node *fib_node)
5635 switch (fib_node->fib->proto) {
5636 case MLXSW_SP_L3_PROTO_IPV4:
5637 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5639 case MLXSW_SP_L3_PROTO_IPV6:
5640 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5645 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5646 struct mlxsw_sp_vr *vr,
5647 enum mlxsw_sp_l3proto proto)
5649 struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5650 struct mlxsw_sp_fib_node *fib_node, *tmp;
5652 list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5653 bool do_break = &tmp->list == &fib->node_list;
5655 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5661 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5665 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5666 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5668 if (!mlxsw_sp_vr_is_used(vr))
5671 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5672 mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5673 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5675 /* If virtual router was only used for IPv4, then it's no
5678 if (!mlxsw_sp_vr_is_used(vr))
5680 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5684 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5688 if (mlxsw_sp->router->aborted)
5690 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5691 mlxsw_sp_router_fib_flush(mlxsw_sp);
5692 mlxsw_sp->router->aborted = true;
5693 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5695 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5698 struct mlxsw_sp_fib_event_work {
5699 struct work_struct work;
5701 struct fib6_entry_notifier_info fen6_info;
5702 struct fib_entry_notifier_info fen_info;
5703 struct fib_rule_notifier_info fr_info;
5704 struct fib_nh_notifier_info fnh_info;
5705 struct mfc_entry_notifier_info men_info;
5706 struct vif_entry_notifier_info ven_info;
5708 struct mlxsw_sp *mlxsw_sp;
5709 unsigned long event;
5712 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5714 struct mlxsw_sp_fib_event_work *fib_work =
5715 container_of(work, struct mlxsw_sp_fib_event_work, work);
5716 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5717 bool replace, append;
5720 /* Protect internal structures from changes */
5722 mlxsw_sp_span_respin(mlxsw_sp);
5724 switch (fib_work->event) {
5725 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5726 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5727 case FIB_EVENT_ENTRY_ADD:
5728 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5729 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5730 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5733 mlxsw_sp_router_fib_abort(mlxsw_sp);
5734 fib_info_put(fib_work->fen_info.fi);
5736 case FIB_EVENT_ENTRY_DEL:
5737 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5738 fib_info_put(fib_work->fen_info.fi);
5740 case FIB_EVENT_RULE_ADD:
5741 /* if we get here, a rule was added that we do not support.
5742 * just do the fib_abort
5744 mlxsw_sp_router_fib_abort(mlxsw_sp);
5746 case FIB_EVENT_NH_ADD: /* fall through */
5747 case FIB_EVENT_NH_DEL:
5748 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5749 fib_work->fnh_info.fib_nh);
5750 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5757 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5759 struct mlxsw_sp_fib_event_work *fib_work =
5760 container_of(work, struct mlxsw_sp_fib_event_work, work);
5761 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5766 mlxsw_sp_span_respin(mlxsw_sp);
5768 switch (fib_work->event) {
5769 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5770 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5771 case FIB_EVENT_ENTRY_ADD:
5772 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5773 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5774 fib_work->fen6_info.rt, replace);
5776 mlxsw_sp_router_fib_abort(mlxsw_sp);
5777 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5779 case FIB_EVENT_ENTRY_DEL:
5780 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5781 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5783 case FIB_EVENT_RULE_ADD:
5784 /* if we get here, a rule was added that we do not support.
5785 * just do the fib_abort
5787 mlxsw_sp_router_fib_abort(mlxsw_sp);
5794 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5796 struct mlxsw_sp_fib_event_work *fib_work =
5797 container_of(work, struct mlxsw_sp_fib_event_work, work);
5798 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5803 switch (fib_work->event) {
5804 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5805 case FIB_EVENT_ENTRY_ADD:
5806 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5808 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5811 mlxsw_sp_router_fib_abort(mlxsw_sp);
5812 mr_cache_put(fib_work->men_info.mfc);
5814 case FIB_EVENT_ENTRY_DEL:
5815 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5816 mr_cache_put(fib_work->men_info.mfc);
5818 case FIB_EVENT_VIF_ADD:
5819 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5820 &fib_work->ven_info);
5822 mlxsw_sp_router_fib_abort(mlxsw_sp);
5823 dev_put(fib_work->ven_info.dev);
5825 case FIB_EVENT_VIF_DEL:
5826 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5827 &fib_work->ven_info);
5828 dev_put(fib_work->ven_info.dev);
5830 case FIB_EVENT_RULE_ADD:
5831 /* if we get here, a rule was added that we do not support.
5832 * just do the fib_abort
5834 mlxsw_sp_router_fib_abort(mlxsw_sp);
5841 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5842 struct fib_notifier_info *info)
5844 struct fib_entry_notifier_info *fen_info;
5845 struct fib_nh_notifier_info *fnh_info;
5847 switch (fib_work->event) {
5848 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5849 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5850 case FIB_EVENT_ENTRY_ADD: /* fall through */
5851 case FIB_EVENT_ENTRY_DEL:
5852 fen_info = container_of(info, struct fib_entry_notifier_info,
5854 fib_work->fen_info = *fen_info;
5855 /* Take reference on fib_info to prevent it from being
5856 * freed while work is queued. Release it afterwards.
5858 fib_info_hold(fib_work->fen_info.fi);
5860 case FIB_EVENT_NH_ADD: /* fall through */
5861 case FIB_EVENT_NH_DEL:
5862 fnh_info = container_of(info, struct fib_nh_notifier_info,
5864 fib_work->fnh_info = *fnh_info;
5865 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5870 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5871 struct fib_notifier_info *info)
5873 struct fib6_entry_notifier_info *fen6_info;
5875 switch (fib_work->event) {
5876 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5877 case FIB_EVENT_ENTRY_APPEND: /* fall through */
5878 case FIB_EVENT_ENTRY_ADD: /* fall through */
5879 case FIB_EVENT_ENTRY_DEL:
5880 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5882 fib_work->fen6_info = *fen6_info;
5883 fib6_info_hold(fib_work->fen6_info.rt);
5889 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5890 struct fib_notifier_info *info)
5892 switch (fib_work->event) {
5893 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5894 case FIB_EVENT_ENTRY_ADD: /* fall through */
5895 case FIB_EVENT_ENTRY_DEL:
5896 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5897 mr_cache_hold(fib_work->men_info.mfc);
5899 case FIB_EVENT_VIF_ADD: /* fall through */
5900 case FIB_EVENT_VIF_DEL:
5901 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5902 dev_hold(fib_work->ven_info.dev);
5907 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5908 struct fib_notifier_info *info,
5909 struct mlxsw_sp *mlxsw_sp)
5911 struct netlink_ext_ack *extack = info->extack;
5912 struct fib_rule_notifier_info *fr_info;
5913 struct fib_rule *rule;
5916 /* nothing to do at the moment */
5917 if (event == FIB_EVENT_RULE_DEL)
5920 if (mlxsw_sp->router->aborted)
5923 fr_info = container_of(info, struct fib_rule_notifier_info, info);
5924 rule = fr_info->rule;
5926 switch (info->family) {
5928 if (!fib4_rule_default(rule) && !rule->l3mdev)
5932 if (!fib6_rule_default(rule) && !rule->l3mdev)
5935 case RTNL_FAMILY_IPMR:
5936 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5939 case RTNL_FAMILY_IP6MR:
5940 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5946 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5951 /* Called with rcu_read_lock() */
5952 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5953 unsigned long event, void *ptr)
5955 struct mlxsw_sp_fib_event_work *fib_work;
5956 struct fib_notifier_info *info = ptr;
5957 struct mlxsw_sp_router *router;
5960 if (!net_eq(info->net, &init_net) ||
5961 (info->family != AF_INET && info->family != AF_INET6 &&
5962 info->family != RTNL_FAMILY_IPMR &&
5963 info->family != RTNL_FAMILY_IP6MR))
5966 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5969 case FIB_EVENT_RULE_ADD: /* fall through */
5970 case FIB_EVENT_RULE_DEL:
5971 err = mlxsw_sp_router_fib_rule_event(event, info,
5973 if (!err || info->extack)
5974 return notifier_from_errno(err);
5976 case FIB_EVENT_ENTRY_ADD:
5977 if (router->aborted) {
5978 NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5979 return notifier_from_errno(-EINVAL);
5984 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5988 fib_work->mlxsw_sp = router->mlxsw_sp;
5989 fib_work->event = event;
5991 switch (info->family) {
5993 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5994 mlxsw_sp_router_fib4_event(fib_work, info);
5997 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5998 mlxsw_sp_router_fib6_event(fib_work, info);
6000 case RTNL_FAMILY_IP6MR:
6001 case RTNL_FAMILY_IPMR:
6002 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6003 mlxsw_sp_router_fibmr_event(fib_work, info);
6007 mlxsw_core_schedule_work(&fib_work->work);
6012 struct mlxsw_sp_rif *
6013 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6014 const struct net_device *dev)
6018 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6019 if (mlxsw_sp->router->rifs[i] &&
6020 mlxsw_sp->router->rifs[i]->dev == dev)
6021 return mlxsw_sp->router->rifs[i];
6026 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6028 char ritr_pl[MLXSW_REG_RITR_LEN];
6031 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6032 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6033 if (WARN_ON_ONCE(err))
6036 mlxsw_reg_ritr_enable_set(ritr_pl, false);
6037 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6040 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6041 struct mlxsw_sp_rif *rif)
6043 mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6044 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6045 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6049 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6050 unsigned long event)
6052 struct inet6_dev *inet6_dev;
6053 bool addr_list_empty = true;
6054 struct in_device *idev;
6060 idev = __in_dev_get_rtnl(dev);
6061 if (idev && idev->ifa_list)
6062 addr_list_empty = false;
6064 inet6_dev = __in6_dev_get(dev);
6065 if (addr_list_empty && inet6_dev &&
6066 !list_empty(&inet6_dev->addr_list))
6067 addr_list_empty = false;
6069 /* macvlans do not have a RIF, but rather piggy back on the
6070 * RIF of their lower device.
6072 if (netif_is_macvlan(dev) && addr_list_empty)
6075 if (rif && addr_list_empty &&
6076 !netif_is_l3_slave(rif->dev))
6078 /* It is possible we already removed the RIF ourselves
6079 * if it was assigned to a netdev that is now a bridge
6088 static enum mlxsw_sp_rif_type
6089 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6090 const struct net_device *dev)
6092 enum mlxsw_sp_fid_type type;
6094 if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6095 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6097 /* Otherwise RIF type is derived from the type of the underlying FID. */
6098 if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6099 type = MLXSW_SP_FID_TYPE_8021Q;
6100 else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6101 type = MLXSW_SP_FID_TYPE_8021Q;
6102 else if (netif_is_bridge_master(dev))
6103 type = MLXSW_SP_FID_TYPE_8021D;
6105 type = MLXSW_SP_FID_TYPE_RFID;
6107 return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6110 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6114 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6115 if (!mlxsw_sp->router->rifs[i]) {
6124 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6126 struct net_device *l3_dev)
6128 struct mlxsw_sp_rif *rif;
6130 rif = kzalloc(rif_size, GFP_KERNEL);
6134 INIT_LIST_HEAD(&rif->nexthop_list);
6135 INIT_LIST_HEAD(&rif->neigh_list);
6136 ether_addr_copy(rif->addr, l3_dev->dev_addr);
6137 rif->mtu = l3_dev->mtu;
6140 rif->rif_index = rif_index;
6145 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6148 return mlxsw_sp->router->rifs[rif_index];
6151 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6153 return rif->rif_index;
6156 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6158 return lb_rif->common.rif_index;
6161 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6163 return lb_rif->ul_vr_id;
6166 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6168 return rif->dev->ifindex;
6171 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6176 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6181 static struct mlxsw_sp_rif *
6182 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6183 const struct mlxsw_sp_rif_params *params,
6184 struct netlink_ext_ack *extack)
6186 u32 tb_id = l3mdev_fib_table(params->dev);
6187 const struct mlxsw_sp_rif_ops *ops;
6188 struct mlxsw_sp_fid *fid = NULL;
6189 enum mlxsw_sp_rif_type type;
6190 struct mlxsw_sp_rif *rif;
6191 struct mlxsw_sp_vr *vr;
6195 type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6196 ops = mlxsw_sp->router->rif_ops_arr[type];
6198 vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6200 return ERR_CAST(vr);
6203 err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6205 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6206 goto err_rif_index_alloc;
6209 rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6214 rif->mlxsw_sp = mlxsw_sp;
6218 fid = ops->fid_get(rif, extack);
6227 ops->setup(rif, params);
6229 err = ops->configure(rif);
6233 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6234 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6236 goto err_mr_rif_add;
6239 mlxsw_sp_rif_counters_alloc(rif);
6240 mlxsw_sp->router->rifs[rif_index] = rif;
6245 for (i--; i >= 0; i--)
6246 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6247 ops->deconfigure(rif);
6250 mlxsw_sp_fid_put(fid);
6254 err_rif_index_alloc:
6256 mlxsw_sp_vr_put(mlxsw_sp, vr);
6257 return ERR_PTR(err);
6260 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6262 const struct mlxsw_sp_rif_ops *ops = rif->ops;
6263 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6264 struct mlxsw_sp_fid *fid = rif->fid;
6265 struct mlxsw_sp_vr *vr;
6268 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6269 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6271 mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6272 mlxsw_sp_rif_counters_free(rif);
6273 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6274 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6275 ops->deconfigure(rif);
6277 /* Loopback RIFs are not associated with a FID. */
6278 mlxsw_sp_fid_put(fid);
6281 mlxsw_sp_vr_put(mlxsw_sp, vr);
6284 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6285 struct net_device *dev)
6287 struct mlxsw_sp_rif *rif;
6289 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6292 mlxsw_sp_rif_destroy(rif);
6296 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6297 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6299 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6301 params->vid = mlxsw_sp_port_vlan->vid;
6302 params->lag = mlxsw_sp_port->lagged;
6304 params->lag_id = mlxsw_sp_port->lag_id;
6306 params->system_port = mlxsw_sp_port->local_port;
6310 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6311 struct net_device *l3_dev,
6312 struct netlink_ext_ack *extack)
6314 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6315 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6316 u16 vid = mlxsw_sp_port_vlan->vid;
6317 struct mlxsw_sp_rif *rif;
6318 struct mlxsw_sp_fid *fid;
6321 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6323 struct mlxsw_sp_rif_params params = {
6327 mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan);
6328 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
6330 return PTR_ERR(rif);
6333 /* FID was already created, just take a reference */
6334 fid = rif->ops->fid_get(rif, extack);
6335 err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6337 goto err_fid_port_vid_map;
6339 err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6341 goto err_port_vid_learning_set;
6343 err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6344 BR_STATE_FORWARDING);
6346 goto err_port_vid_stp_set;
6348 mlxsw_sp_port_vlan->fid = fid;
6352 err_port_vid_stp_set:
6353 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6354 err_port_vid_learning_set:
6355 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6356 err_fid_port_vid_map:
6357 mlxsw_sp_fid_put(fid);
6362 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6364 struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6365 struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6366 u16 vid = mlxsw_sp_port_vlan->vid;
6368 if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6371 mlxsw_sp_port_vlan->fid = NULL;
6372 mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6373 mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6374 mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6375 /* If router port holds the last reference on the rFID, then the
6376 * associated Sub-port RIF will be destroyed.
6378 mlxsw_sp_fid_put(fid);
6381 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6382 struct net_device *port_dev,
6383 unsigned long event, u16 vid,
6384 struct netlink_ext_ack *extack)
6386 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6387 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6389 mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6390 if (WARN_ON(!mlxsw_sp_port_vlan))
6395 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6398 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6405 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6406 unsigned long event,
6407 struct netlink_ext_ack *extack)
6409 if (netif_is_bridge_port(port_dev) ||
6410 netif_is_lag_port(port_dev) ||
6411 netif_is_ovs_port(port_dev))
6414 return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6418 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6419 struct net_device *lag_dev,
6420 unsigned long event, u16 vid,
6421 struct netlink_ext_ack *extack)
6423 struct net_device *port_dev;
6424 struct list_head *iter;
6427 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6428 if (mlxsw_sp_port_dev_check(port_dev)) {
6429 err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6441 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6442 unsigned long event,
6443 struct netlink_ext_ack *extack)
6445 if (netif_is_bridge_port(lag_dev))
6448 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6452 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6453 unsigned long event,
6454 struct netlink_ext_ack *extack)
6456 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6457 struct mlxsw_sp_rif_params params = {
6460 struct mlxsw_sp_rif *rif;
6464 rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack);
6466 return PTR_ERR(rif);
6469 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6470 mlxsw_sp_rif_destroy(rif);
6477 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6478 unsigned long event,
6479 struct netlink_ext_ack *extack)
6481 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6482 u16 vid = vlan_dev_vlan_id(vlan_dev);
6484 if (netif_is_bridge_port(vlan_dev))
6487 if (mlxsw_sp_port_dev_check(real_dev))
6488 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6489 event, vid, extack);
6490 else if (netif_is_lag_master(real_dev))
6491 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6493 else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6494 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6499 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6501 u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6502 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6504 return ether_addr_equal_masked(mac, vrrp4, mask);
6507 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6509 u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6510 u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6512 return ether_addr_equal_masked(mac, vrrp6, mask);
6515 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6516 const u8 *mac, bool adding)
6518 char ritr_pl[MLXSW_REG_RITR_LEN];
6519 u8 vrrp_id = adding ? mac[5] : 0;
6522 if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6523 !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6526 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6527 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6531 if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6532 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6534 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6536 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6539 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6540 const struct net_device *macvlan_dev,
6541 struct netlink_ext_ack *extack)
6543 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6544 struct mlxsw_sp_rif *rif;
6547 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6549 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6553 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6554 mlxsw_sp_fid_index(rif->fid), true);
6558 err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6559 macvlan_dev->dev_addr, true);
6561 goto err_rif_vrrp_add;
6563 /* Make sure the bridge driver does not have this MAC pointing at
6566 if (rif->ops->fdb_del)
6567 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6572 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6573 mlxsw_sp_fid_index(rif->fid), false);
6577 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6578 const struct net_device *macvlan_dev)
6580 struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6581 struct mlxsw_sp_rif *rif;
6583 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6584 /* If we do not have a RIF, then we already took care of
6585 * removing the macvlan's MAC during RIF deletion.
6589 mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6591 mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6592 mlxsw_sp_fid_index(rif->fid), false);
6595 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6596 unsigned long event,
6597 struct netlink_ext_ack *extack)
6599 struct mlxsw_sp *mlxsw_sp;
6601 mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6607 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6609 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6616 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6617 unsigned long event,
6618 struct netlink_ext_ack *extack)
6620 if (mlxsw_sp_port_dev_check(dev))
6621 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6622 else if (netif_is_lag_master(dev))
6623 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6624 else if (netif_is_bridge_master(dev))
6625 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6626 else if (is_vlan_dev(dev))
6627 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6628 else if (netif_is_macvlan(dev))
6629 return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6634 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6635 unsigned long event, void *ptr)
6637 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6638 struct net_device *dev = ifa->ifa_dev->dev;
6639 struct mlxsw_sp *mlxsw_sp;
6640 struct mlxsw_sp_rif *rif;
6643 /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6644 if (event == NETDEV_UP)
6647 mlxsw_sp = mlxsw_sp_lower_get(dev);
6651 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6652 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6655 err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6657 return notifier_from_errno(err);
6660 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6661 unsigned long event, void *ptr)
6663 struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6664 struct net_device *dev = ivi->ivi_dev->dev;
6665 struct mlxsw_sp *mlxsw_sp;
6666 struct mlxsw_sp_rif *rif;
6669 mlxsw_sp = mlxsw_sp_lower_get(dev);
6673 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6674 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6677 err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6679 return notifier_from_errno(err);
6682 struct mlxsw_sp_inet6addr_event_work {
6683 struct work_struct work;
6684 struct net_device *dev;
6685 unsigned long event;
6688 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6690 struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6691 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6692 struct net_device *dev = inet6addr_work->dev;
6693 unsigned long event = inet6addr_work->event;
6694 struct mlxsw_sp *mlxsw_sp;
6695 struct mlxsw_sp_rif *rif;
6698 mlxsw_sp = mlxsw_sp_lower_get(dev);
6702 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6703 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6706 __mlxsw_sp_inetaddr_event(dev, event, NULL);
6710 kfree(inet6addr_work);
6713 /* Called with rcu_read_lock() */
6714 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6715 unsigned long event, void *ptr)
6717 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6718 struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6719 struct net_device *dev = if6->idev->dev;
6721 /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6722 if (event == NETDEV_UP)
6725 if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6728 inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6729 if (!inet6addr_work)
6732 INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6733 inet6addr_work->dev = dev;
6734 inet6addr_work->event = event;
6736 mlxsw_core_schedule_work(&inet6addr_work->work);
6741 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6742 unsigned long event, void *ptr)
6744 struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6745 struct net_device *dev = i6vi->i6vi_dev->dev;
6746 struct mlxsw_sp *mlxsw_sp;
6747 struct mlxsw_sp_rif *rif;
6750 mlxsw_sp = mlxsw_sp_lower_get(dev);
6754 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6755 if (!mlxsw_sp_rif_should_config(rif, dev, event))
6758 err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6760 return notifier_from_errno(err);
6763 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6764 const char *mac, int mtu)
6766 char ritr_pl[MLXSW_REG_RITR_LEN];
6769 mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6770 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6774 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6775 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6776 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6777 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6780 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6782 struct mlxsw_sp *mlxsw_sp;
6783 struct mlxsw_sp_rif *rif;
6787 mlxsw_sp = mlxsw_sp_lower_get(dev);
6791 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6794 fid_index = mlxsw_sp_fid_index(rif->fid);
6796 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6800 err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6805 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6807 goto err_rif_fdb_op;
6809 if (rif->mtu != dev->mtu) {
6810 struct mlxsw_sp_vr *vr;
6813 /* The RIF is relevant only to its mr_table instance, as unlike
6814 * unicast routing, in multicast routing a RIF cannot be shared
6815 * between several multicast routing tables.
6817 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6818 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6819 mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6823 ether_addr_copy(rif->addr, dev->dev_addr);
6824 rif->mtu = dev->mtu;
6826 netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6831 mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6833 mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6837 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6838 struct net_device *l3_dev,
6839 struct netlink_ext_ack *extack)
6841 struct mlxsw_sp_rif *rif;
6843 /* If netdev is already associated with a RIF, then we need to
6844 * destroy it and create a new one with the new virtual router ID.
6846 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6848 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6850 return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6853 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6854 struct net_device *l3_dev)
6856 struct mlxsw_sp_rif *rif;
6858 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6861 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6864 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6865 struct netdev_notifier_changeupper_info *info)
6867 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6870 /* We do not create a RIF for a macvlan, but only use it to
6871 * direct more MAC addresses to the router.
6873 if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6877 case NETDEV_PRECHANGEUPPER:
6879 case NETDEV_CHANGEUPPER:
6880 if (info->linking) {
6881 struct netlink_ext_ack *extack;
6883 extack = netdev_notifier_info_to_extack(&info->info);
6884 err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6886 mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6894 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6896 struct mlxsw_sp_rif *rif = data;
6898 if (!netif_is_macvlan(dev))
6901 return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6902 mlxsw_sp_fid_index(rif->fid), false);
6905 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6907 if (!netif_is_macvlan_port(rif->dev))
6910 netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6911 return netdev_walk_all_upper_dev_rcu(rif->dev,
6912 __mlxsw_sp_rif_macvlan_flush, rif);
6915 static struct mlxsw_sp_rif_subport *
6916 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6918 return container_of(rif, struct mlxsw_sp_rif_subport, common);
6921 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6922 const struct mlxsw_sp_rif_params *params)
6924 struct mlxsw_sp_rif_subport *rif_subport;
6926 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6927 rif_subport->vid = params->vid;
6928 rif_subport->lag = params->lag;
6930 rif_subport->lag_id = params->lag_id;
6932 rif_subport->system_port = params->system_port;
6935 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6937 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6938 struct mlxsw_sp_rif_subport *rif_subport;
6939 char ritr_pl[MLXSW_REG_RITR_LEN];
6941 rif_subport = mlxsw_sp_rif_subport_rif(rif);
6942 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6943 rif->rif_index, rif->vr_id, rif->dev->mtu);
6944 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6945 mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6946 rif_subport->lag ? rif_subport->lag_id :
6947 rif_subport->system_port,
6950 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6953 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6957 err = mlxsw_sp_rif_subport_op(rif, true);
6961 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6962 mlxsw_sp_fid_index(rif->fid), true);
6964 goto err_rif_fdb_op;
6966 mlxsw_sp_fid_rif_set(rif->fid, rif);
6970 mlxsw_sp_rif_subport_op(rif, false);
6974 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6976 struct mlxsw_sp_fid *fid = rif->fid;
6978 mlxsw_sp_fid_rif_set(fid, NULL);
6979 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6980 mlxsw_sp_fid_index(fid), false);
6981 mlxsw_sp_rif_macvlan_flush(rif);
6982 mlxsw_sp_rif_subport_op(rif, false);
6985 static struct mlxsw_sp_fid *
6986 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
6987 struct netlink_ext_ack *extack)
6989 return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6992 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6993 .type = MLXSW_SP_RIF_TYPE_SUBPORT,
6994 .rif_size = sizeof(struct mlxsw_sp_rif_subport),
6995 .setup = mlxsw_sp_rif_subport_setup,
6996 .configure = mlxsw_sp_rif_subport_configure,
6997 .deconfigure = mlxsw_sp_rif_subport_deconfigure,
6998 .fid_get = mlxsw_sp_rif_subport_fid_get,
7001 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7002 enum mlxsw_reg_ritr_if_type type,
7003 u16 vid_fid, bool enable)
7005 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7006 char ritr_pl[MLXSW_REG_RITR_LEN];
7008 mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7010 mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7011 mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7013 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7016 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7018 return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7021 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7023 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7024 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7027 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7031 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7032 mlxsw_sp_router_port(mlxsw_sp), true);
7034 goto err_fid_mc_flood_set;
7036 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7037 mlxsw_sp_router_port(mlxsw_sp), true);
7039 goto err_fid_bc_flood_set;
7041 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7042 mlxsw_sp_fid_index(rif->fid), true);
7044 goto err_rif_fdb_op;
7046 mlxsw_sp_fid_rif_set(rif->fid, rif);
7050 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7051 mlxsw_sp_router_port(mlxsw_sp), false);
7052 err_fid_bc_flood_set:
7053 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7054 mlxsw_sp_router_port(mlxsw_sp), false);
7055 err_fid_mc_flood_set:
7056 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7060 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7062 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7063 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7064 struct mlxsw_sp_fid *fid = rif->fid;
7066 mlxsw_sp_fid_rif_set(fid, NULL);
7067 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7068 mlxsw_sp_fid_index(fid), false);
7069 mlxsw_sp_rif_macvlan_flush(rif);
7070 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7071 mlxsw_sp_router_port(mlxsw_sp), false);
7072 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7073 mlxsw_sp_router_port(mlxsw_sp), false);
7074 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7077 static struct mlxsw_sp_fid *
7078 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7079 struct netlink_ext_ack *extack)
7084 if (is_vlan_dev(rif->dev)) {
7085 vid = vlan_dev_vlan_id(rif->dev);
7087 err = br_vlan_get_pvid(rif->dev, &vid);
7088 if (err < 0 || !vid) {
7089 NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7090 return ERR_PTR(-EINVAL);
7094 return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7097 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7099 u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7100 struct switchdev_notifier_fdb_info info;
7101 struct net_device *br_dev;
7102 struct net_device *dev;
7104 br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7105 dev = br_fdb_find_port(br_dev, mac, vid);
7111 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7114 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7115 .type = MLXSW_SP_RIF_TYPE_VLAN,
7116 .rif_size = sizeof(struct mlxsw_sp_rif),
7117 .configure = mlxsw_sp_rif_vlan_configure,
7118 .deconfigure = mlxsw_sp_rif_vlan_deconfigure,
7119 .fid_get = mlxsw_sp_rif_vlan_fid_get,
7120 .fdb_del = mlxsw_sp_rif_vlan_fdb_del,
7123 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7125 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7126 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7129 err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7134 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7135 mlxsw_sp_router_port(mlxsw_sp), true);
7137 goto err_fid_mc_flood_set;
7139 err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7140 mlxsw_sp_router_port(mlxsw_sp), true);
7142 goto err_fid_bc_flood_set;
7144 err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7145 mlxsw_sp_fid_index(rif->fid), true);
7147 goto err_rif_fdb_op;
7149 mlxsw_sp_fid_rif_set(rif->fid, rif);
7153 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7154 mlxsw_sp_router_port(mlxsw_sp), false);
7155 err_fid_bc_flood_set:
7156 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7157 mlxsw_sp_router_port(mlxsw_sp), false);
7158 err_fid_mc_flood_set:
7159 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7163 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7165 u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7166 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7167 struct mlxsw_sp_fid *fid = rif->fid;
7169 mlxsw_sp_fid_rif_set(fid, NULL);
7170 mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7171 mlxsw_sp_fid_index(fid), false);
7172 mlxsw_sp_rif_macvlan_flush(rif);
7173 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7174 mlxsw_sp_router_port(mlxsw_sp), false);
7175 mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7176 mlxsw_sp_router_port(mlxsw_sp), false);
7177 mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7180 static struct mlxsw_sp_fid *
7181 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7182 struct netlink_ext_ack *extack)
7184 return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7187 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7189 struct switchdev_notifier_fdb_info info;
7190 struct net_device *dev;
7192 dev = br_fdb_find_port(rif->dev, mac, 0);
7198 call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7201 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7202 .type = MLXSW_SP_RIF_TYPE_FID,
7203 .rif_size = sizeof(struct mlxsw_sp_rif),
7204 .configure = mlxsw_sp_rif_fid_configure,
7205 .deconfigure = mlxsw_sp_rif_fid_deconfigure,
7206 .fid_get = mlxsw_sp_rif_fid_fid_get,
7207 .fdb_del = mlxsw_sp_rif_fid_fdb_del,
7210 static struct mlxsw_sp_rif_ipip_lb *
7211 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7213 return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7217 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7218 const struct mlxsw_sp_rif_params *params)
7220 struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7221 struct mlxsw_sp_rif_ipip_lb *rif_lb;
7223 params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7225 rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7226 rif_lb->lb_config = params_lb->lb_config;
7230 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7232 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7233 u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7234 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7235 struct mlxsw_sp_vr *ul_vr;
7238 ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7240 return PTR_ERR(ul_vr);
7242 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7244 goto err_loopback_op;
7246 lb_rif->ul_vr_id = ul_vr->id;
7251 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7255 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7257 struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7258 struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7259 struct mlxsw_sp_vr *ul_vr;
7261 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7262 mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7265 mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7268 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7269 .type = MLXSW_SP_RIF_TYPE_IPIP_LB,
7270 .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb),
7271 .setup = mlxsw_sp_rif_ipip_lb_setup,
7272 .configure = mlxsw_sp_rif_ipip_lb_configure,
7273 .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure,
7276 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7277 [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops,
7278 [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops,
7279 [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops,
7280 [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops,
7283 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7285 u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7287 mlxsw_sp->router->rifs = kcalloc(max_rifs,
7288 sizeof(struct mlxsw_sp_rif *),
7290 if (!mlxsw_sp->router->rifs)
7293 mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7298 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7302 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7303 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7305 kfree(mlxsw_sp->router->rifs);
7309 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7311 char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7313 mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7314 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7317 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7319 mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7320 INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7321 return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7324 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7326 WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7329 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7331 struct mlxsw_sp_router *router;
7333 /* Flush pending FIB notifications and then flush the device's
7334 * table before requesting another dump. The FIB notification
7335 * block is unregistered, so no need to take RTNL.
7337 mlxsw_core_flush_owq();
7338 router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7339 mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7343 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7345 mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7348 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7350 mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7353 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7355 bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7357 mlxsw_sp_mp_hash_header_set(recr2_pl,
7358 MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7359 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7360 mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7361 mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7364 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7365 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7366 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7367 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7370 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7372 bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7374 mlxsw_sp_mp_hash_header_set(recr2_pl,
7375 MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7376 mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7377 mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7378 mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7379 mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7381 mlxsw_sp_mp_hash_field_set(recr2_pl,
7382 MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7384 mlxsw_sp_mp_hash_header_set(recr2_pl,
7385 MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7386 mlxsw_sp_mp_hash_field_set(recr2_pl,
7387 MLXSW_REG_RECR2_TCP_UDP_SPORT);
7388 mlxsw_sp_mp_hash_field_set(recr2_pl,
7389 MLXSW_REG_RECR2_TCP_UDP_DPORT);
7393 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7395 char recr2_pl[MLXSW_REG_RECR2_LEN];
7398 get_random_bytes(&seed, sizeof(seed));
7399 mlxsw_reg_recr2_pack(recr2_pl, seed);
7400 mlxsw_sp_mp4_hash_init(recr2_pl);
7401 mlxsw_sp_mp6_hash_init(recr2_pl);
7403 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7406 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7412 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7414 char rdpm_pl[MLXSW_REG_RDPM_LEN];
7417 MLXSW_REG_ZERO(rdpm, rdpm_pl);
7419 /* HW is determining switch priority based on DSCP-bits, but the
7420 * kernel is still doing that based on the ToS. Since there's a
7421 * mismatch in bits we need to make sure to translate the right
7422 * value ToS would observe, skipping the 2 least-significant ECN bits.
7424 for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7425 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7427 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7430 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7432 bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7433 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7437 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7439 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7441 mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7442 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7443 mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7444 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7450 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7452 char rgcr_pl[MLXSW_REG_RGCR_LEN];
7454 mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7455 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7458 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7460 struct mlxsw_sp_router *router;
7463 router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7466 mlxsw_sp->router = router;
7467 router->mlxsw_sp = mlxsw_sp;
7469 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7470 err = __mlxsw_sp_router_init(mlxsw_sp);
7472 goto err_router_init;
7474 err = mlxsw_sp_rifs_init(mlxsw_sp);
7478 err = mlxsw_sp_ipips_init(mlxsw_sp);
7480 goto err_ipips_init;
7482 err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7483 &mlxsw_sp_nexthop_ht_params);
7485 goto err_nexthop_ht_init;
7487 err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7488 &mlxsw_sp_nexthop_group_ht_params);
7490 goto err_nexthop_group_ht_init;
7492 INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7493 err = mlxsw_sp_lpm_init(mlxsw_sp);
7497 err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7501 err = mlxsw_sp_vrs_init(mlxsw_sp);
7505 err = mlxsw_sp_neigh_init(mlxsw_sp);
7507 goto err_neigh_init;
7509 mlxsw_sp->router->netevent_nb.notifier_call =
7510 mlxsw_sp_router_netevent_event;
7511 err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7513 goto err_register_netevent_notifier;
7515 err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7517 goto err_mp_hash_init;
7519 err = mlxsw_sp_dscp_init(mlxsw_sp);
7523 mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7524 err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7525 mlxsw_sp_router_fib_dump_flush);
7527 goto err_register_fib_notifier;
7531 err_register_fib_notifier:
7534 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7535 err_register_netevent_notifier:
7536 mlxsw_sp_neigh_fini(mlxsw_sp);
7538 mlxsw_sp_vrs_fini(mlxsw_sp);
7540 mlxsw_sp_mr_fini(mlxsw_sp);
7542 mlxsw_sp_lpm_fini(mlxsw_sp);
7544 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7545 err_nexthop_group_ht_init:
7546 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7547 err_nexthop_ht_init:
7548 mlxsw_sp_ipips_fini(mlxsw_sp);
7550 mlxsw_sp_rifs_fini(mlxsw_sp);
7552 __mlxsw_sp_router_fini(mlxsw_sp);
7554 kfree(mlxsw_sp->router);
7558 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7560 unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7561 unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7562 mlxsw_sp_neigh_fini(mlxsw_sp);
7563 mlxsw_sp_vrs_fini(mlxsw_sp);
7564 mlxsw_sp_mr_fini(mlxsw_sp);
7565 mlxsw_sp_lpm_fini(mlxsw_sp);
7566 rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7567 rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7568 mlxsw_sp_ipips_fini(mlxsw_sp);
7569 mlxsw_sp_rifs_fini(mlxsw_sp);
7570 __mlxsw_sp_router_fini(mlxsw_sp);
7571 kfree(mlxsw_sp->router);