GNU Linux-libre 4.19.245-gnu1
[releases.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4 #include <linux/kernel.h>
5 #include <linux/types.h>
6 #include <linux/rhashtable.h>
7 #include <linux/bitops.h>
8 #include <linux/in6.h>
9 #include <linux/notifier.h>
10 #include <linux/inetdevice.h>
11 #include <linux/netdevice.h>
12 #include <linux/if_bridge.h>
13 #include <linux/socket.h>
14 #include <linux/route.h>
15 #include <linux/gcd.h>
16 #include <linux/random.h>
17 #include <linux/if_macvlan.h>
18 #include <net/netevent.h>
19 #include <net/neighbour.h>
20 #include <net/arp.h>
21 #include <net/ip_fib.h>
22 #include <net/ip6_fib.h>
23 #include <net/fib_rules.h>
24 #include <net/ip_tunnels.h>
25 #include <net/l3mdev.h>
26 #include <net/addrconf.h>
27 #include <net/ndisc.h>
28 #include <net/ipv6.h>
29 #include <net/fib_notifier.h>
30 #include <net/switchdev.h>
31
32 #include "spectrum.h"
33 #include "core.h"
34 #include "reg.h"
35 #include "spectrum_cnt.h"
36 #include "spectrum_dpipe.h"
37 #include "spectrum_ipip.h"
38 #include "spectrum_mr.h"
39 #include "spectrum_mr_tcam.h"
40 #include "spectrum_router.h"
41 #include "spectrum_span.h"
42
43 struct mlxsw_sp_fib;
44 struct mlxsw_sp_vr;
45 struct mlxsw_sp_lpm_tree;
46 struct mlxsw_sp_rif_ops;
47
48 struct mlxsw_sp_router {
49         struct mlxsw_sp *mlxsw_sp;
50         struct mlxsw_sp_rif **rifs;
51         struct mlxsw_sp_vr *vrs;
52         struct rhashtable neigh_ht;
53         struct rhashtable nexthop_group_ht;
54         struct rhashtable nexthop_ht;
55         struct list_head nexthop_list;
56         struct {
57                 /* One tree for each protocol: IPv4 and IPv6 */
58                 struct mlxsw_sp_lpm_tree *proto_trees[2];
59                 struct mlxsw_sp_lpm_tree *trees;
60                 unsigned int tree_count;
61         } lpm;
62         struct {
63                 struct delayed_work dw;
64                 unsigned long interval; /* ms */
65         } neighs_update;
66         struct delayed_work nexthop_probe_dw;
67 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
68         struct list_head nexthop_neighs_list;
69         struct list_head ipip_list;
70         bool aborted;
71         struct notifier_block fib_nb;
72         struct notifier_block netevent_nb;
73         const struct mlxsw_sp_rif_ops **rif_ops_arr;
74         const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
75 };
76
77 struct mlxsw_sp_rif {
78         struct list_head nexthop_list;
79         struct list_head neigh_list;
80         struct net_device *dev;
81         struct mlxsw_sp_fid *fid;
82         unsigned char addr[ETH_ALEN];
83         int mtu;
84         u16 rif_index;
85         u16 vr_id;
86         const struct mlxsw_sp_rif_ops *ops;
87         struct mlxsw_sp *mlxsw_sp;
88
89         unsigned int counter_ingress;
90         bool counter_ingress_valid;
91         unsigned int counter_egress;
92         bool counter_egress_valid;
93 };
94
95 struct mlxsw_sp_rif_params {
96         struct net_device *dev;
97         union {
98                 u16 system_port;
99                 u16 lag_id;
100         };
101         u16 vid;
102         bool lag;
103 };
104
105 struct mlxsw_sp_rif_subport {
106         struct mlxsw_sp_rif common;
107         union {
108                 u16 system_port;
109                 u16 lag_id;
110         };
111         u16 vid;
112         bool lag;
113 };
114
115 struct mlxsw_sp_rif_ipip_lb {
116         struct mlxsw_sp_rif common;
117         struct mlxsw_sp_rif_ipip_lb_config lb_config;
118         u16 ul_vr_id; /* Reserved for Spectrum-2. */
119 };
120
121 struct mlxsw_sp_rif_params_ipip_lb {
122         struct mlxsw_sp_rif_params common;
123         struct mlxsw_sp_rif_ipip_lb_config lb_config;
124 };
125
126 struct mlxsw_sp_rif_ops {
127         enum mlxsw_sp_rif_type type;
128         size_t rif_size;
129
130         void (*setup)(struct mlxsw_sp_rif *rif,
131                       const struct mlxsw_sp_rif_params *params);
132         int (*configure)(struct mlxsw_sp_rif *rif);
133         void (*deconfigure)(struct mlxsw_sp_rif *rif);
134         struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
135                                          struct netlink_ext_ack *extack);
136         void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
137 };
138
139 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
140 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
141                                   struct mlxsw_sp_lpm_tree *lpm_tree);
142 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
143                                      const struct mlxsw_sp_fib *fib,
144                                      u8 tree_id);
145 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
146                                        const struct mlxsw_sp_fib *fib);
147
148 static unsigned int *
149 mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
150                            enum mlxsw_sp_rif_counter_dir dir)
151 {
152         switch (dir) {
153         case MLXSW_SP_RIF_COUNTER_EGRESS:
154                 return &rif->counter_egress;
155         case MLXSW_SP_RIF_COUNTER_INGRESS:
156                 return &rif->counter_ingress;
157         }
158         return NULL;
159 }
160
161 static bool
162 mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
163                                enum mlxsw_sp_rif_counter_dir dir)
164 {
165         switch (dir) {
166         case MLXSW_SP_RIF_COUNTER_EGRESS:
167                 return rif->counter_egress_valid;
168         case MLXSW_SP_RIF_COUNTER_INGRESS:
169                 return rif->counter_ingress_valid;
170         }
171         return false;
172 }
173
174 static void
175 mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
176                                enum mlxsw_sp_rif_counter_dir dir,
177                                bool valid)
178 {
179         switch (dir) {
180         case MLXSW_SP_RIF_COUNTER_EGRESS:
181                 rif->counter_egress_valid = valid;
182                 break;
183         case MLXSW_SP_RIF_COUNTER_INGRESS:
184                 rif->counter_ingress_valid = valid;
185                 break;
186         }
187 }
188
189 static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
190                                      unsigned int counter_index, bool enable,
191                                      enum mlxsw_sp_rif_counter_dir dir)
192 {
193         char ritr_pl[MLXSW_REG_RITR_LEN];
194         bool is_egress = false;
195         int err;
196
197         if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
198                 is_egress = true;
199         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
200         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
201         if (err)
202                 return err;
203
204         mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
205                                     is_egress);
206         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
207 }
208
209 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
210                                    struct mlxsw_sp_rif *rif,
211                                    enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
212 {
213         char ricnt_pl[MLXSW_REG_RICNT_LEN];
214         unsigned int *p_counter_index;
215         bool valid;
216         int err;
217
218         valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
219         if (!valid)
220                 return -EINVAL;
221
222         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
223         if (!p_counter_index)
224                 return -EINVAL;
225         mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
226                              MLXSW_REG_RICNT_OPCODE_NOP);
227         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
228         if (err)
229                 return err;
230         *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
231         return 0;
232 }
233
234 static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
235                                       unsigned int counter_index)
236 {
237         char ricnt_pl[MLXSW_REG_RICNT_LEN];
238
239         mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
240                              MLXSW_REG_RICNT_OPCODE_CLEAR);
241         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
242 }
243
244 int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
245                                struct mlxsw_sp_rif *rif,
246                                enum mlxsw_sp_rif_counter_dir dir)
247 {
248         unsigned int *p_counter_index;
249         int err;
250
251         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
252         if (!p_counter_index)
253                 return -EINVAL;
254         err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
255                                      p_counter_index);
256         if (err)
257                 return err;
258
259         err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
260         if (err)
261                 goto err_counter_clear;
262
263         err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
264                                         *p_counter_index, true, dir);
265         if (err)
266                 goto err_counter_edit;
267         mlxsw_sp_rif_counter_valid_set(rif, dir, true);
268         return 0;
269
270 err_counter_edit:
271 err_counter_clear:
272         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273                               *p_counter_index);
274         return err;
275 }
276
277 void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
278                                struct mlxsw_sp_rif *rif,
279                                enum mlxsw_sp_rif_counter_dir dir)
280 {
281         unsigned int *p_counter_index;
282
283         if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
284                 return;
285
286         p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
287         if (WARN_ON(!p_counter_index))
288                 return;
289         mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
290                                   *p_counter_index, false, dir);
291         mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
292                               *p_counter_index);
293         mlxsw_sp_rif_counter_valid_set(rif, dir, false);
294 }
295
296 static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
297 {
298         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
299         struct devlink *devlink;
300
301         devlink = priv_to_devlink(mlxsw_sp->core);
302         if (!devlink_dpipe_table_counter_enabled(devlink,
303                                                  MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
304                 return;
305         mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
306 }
307
308 static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
309 {
310         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
311
312         mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
313 }
314
315 #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
316
317 struct mlxsw_sp_prefix_usage {
318         DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
319 };
320
321 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
322         for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
323
324 static bool
325 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
326                          struct mlxsw_sp_prefix_usage *prefix_usage2)
327 {
328         return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
329 }
330
331 static void
332 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
333                           struct mlxsw_sp_prefix_usage *prefix_usage2)
334 {
335         memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
336 }
337
338 static void
339 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
340                           unsigned char prefix_len)
341 {
342         set_bit(prefix_len, prefix_usage->b);
343 }
344
345 static void
346 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
347                             unsigned char prefix_len)
348 {
349         clear_bit(prefix_len, prefix_usage->b);
350 }
351
352 struct mlxsw_sp_fib_key {
353         unsigned char addr[sizeof(struct in6_addr)];
354         unsigned char prefix_len;
355 };
356
357 enum mlxsw_sp_fib_entry_type {
358         MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
359         MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
360         MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
361
362         /* This is a special case of local delivery, where a packet should be
363          * decapsulated on reception. Note that there is no corresponding ENCAP,
364          * because that's a type of next hop, not of FIB entry. (There can be
365          * several next hops in a REMOTE entry, and some of them may be
366          * encapsulating entries.)
367          */
368         MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
369 };
370
371 struct mlxsw_sp_nexthop_group;
372
373 struct mlxsw_sp_fib_node {
374         struct list_head entry_list;
375         struct list_head list;
376         struct rhash_head ht_node;
377         struct mlxsw_sp_fib *fib;
378         struct mlxsw_sp_fib_key key;
379 };
380
381 struct mlxsw_sp_fib_entry_decap {
382         struct mlxsw_sp_ipip_entry *ipip_entry;
383         u32 tunnel_index;
384 };
385
386 struct mlxsw_sp_fib_entry {
387         struct list_head list;
388         struct mlxsw_sp_fib_node *fib_node;
389         enum mlxsw_sp_fib_entry_type type;
390         struct list_head nexthop_group_node;
391         struct mlxsw_sp_nexthop_group *nh_group;
392         struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
393 };
394
395 struct mlxsw_sp_fib4_entry {
396         struct mlxsw_sp_fib_entry common;
397         u32 tb_id;
398         u32 prio;
399         u8 tos;
400         u8 type;
401 };
402
403 struct mlxsw_sp_fib6_entry {
404         struct mlxsw_sp_fib_entry common;
405         struct list_head rt6_list;
406         unsigned int nrt6;
407 };
408
409 struct mlxsw_sp_rt6 {
410         struct list_head list;
411         struct fib6_info *rt;
412 };
413
414 struct mlxsw_sp_lpm_tree {
415         u8 id; /* tree ID */
416         unsigned int ref_count;
417         enum mlxsw_sp_l3proto proto;
418         unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
419         struct mlxsw_sp_prefix_usage prefix_usage;
420 };
421
422 struct mlxsw_sp_fib {
423         struct rhashtable ht;
424         struct list_head node_list;
425         struct mlxsw_sp_vr *vr;
426         struct mlxsw_sp_lpm_tree *lpm_tree;
427         enum mlxsw_sp_l3proto proto;
428 };
429
430 struct mlxsw_sp_vr {
431         u16 id; /* virtual router ID */
432         u32 tb_id; /* kernel fib table id */
433         unsigned int rif_count;
434         struct mlxsw_sp_fib *fib4;
435         struct mlxsw_sp_fib *fib6;
436         struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
437 };
438
439 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
440
441 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
442                                                 struct mlxsw_sp_vr *vr,
443                                                 enum mlxsw_sp_l3proto proto)
444 {
445         struct mlxsw_sp_lpm_tree *lpm_tree;
446         struct mlxsw_sp_fib *fib;
447         int err;
448
449         lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
450         fib = kzalloc(sizeof(*fib), GFP_KERNEL);
451         if (!fib)
452                 return ERR_PTR(-ENOMEM);
453         err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
454         if (err)
455                 goto err_rhashtable_init;
456         INIT_LIST_HEAD(&fib->node_list);
457         fib->proto = proto;
458         fib->vr = vr;
459         fib->lpm_tree = lpm_tree;
460         mlxsw_sp_lpm_tree_hold(lpm_tree);
461         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
462         if (err)
463                 goto err_lpm_tree_bind;
464         return fib;
465
466 err_lpm_tree_bind:
467         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
468 err_rhashtable_init:
469         kfree(fib);
470         return ERR_PTR(err);
471 }
472
473 static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
474                                  struct mlxsw_sp_fib *fib)
475 {
476         mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
477         mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
478         WARN_ON(!list_empty(&fib->node_list));
479         rhashtable_destroy(&fib->ht);
480         kfree(fib);
481 }
482
483 static struct mlxsw_sp_lpm_tree *
484 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
485 {
486         static struct mlxsw_sp_lpm_tree *lpm_tree;
487         int i;
488
489         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
490                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
491                 if (lpm_tree->ref_count == 0)
492                         return lpm_tree;
493         }
494         return NULL;
495 }
496
497 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
498                                    struct mlxsw_sp_lpm_tree *lpm_tree)
499 {
500         char ralta_pl[MLXSW_REG_RALTA_LEN];
501
502         mlxsw_reg_ralta_pack(ralta_pl, true,
503                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
504                              lpm_tree->id);
505         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
506 }
507
508 static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
509                                    struct mlxsw_sp_lpm_tree *lpm_tree)
510 {
511         char ralta_pl[MLXSW_REG_RALTA_LEN];
512
513         mlxsw_reg_ralta_pack(ralta_pl, false,
514                              (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
515                              lpm_tree->id);
516         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
517 }
518
519 static int
520 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
521                                   struct mlxsw_sp_prefix_usage *prefix_usage,
522                                   struct mlxsw_sp_lpm_tree *lpm_tree)
523 {
524         char ralst_pl[MLXSW_REG_RALST_LEN];
525         u8 root_bin = 0;
526         u8 prefix;
527         u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
528
529         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
530                 root_bin = prefix;
531
532         mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
533         mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
534                 if (prefix == 0)
535                         continue;
536                 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
537                                          MLXSW_REG_RALST_BIN_NO_CHILD);
538                 last_prefix = prefix;
539         }
540         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
541 }
542
543 static struct mlxsw_sp_lpm_tree *
544 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
545                          struct mlxsw_sp_prefix_usage *prefix_usage,
546                          enum mlxsw_sp_l3proto proto)
547 {
548         struct mlxsw_sp_lpm_tree *lpm_tree;
549         int err;
550
551         lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
552         if (!lpm_tree)
553                 return ERR_PTR(-EBUSY);
554         lpm_tree->proto = proto;
555         err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
556         if (err)
557                 return ERR_PTR(err);
558
559         err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
560                                                 lpm_tree);
561         if (err)
562                 goto err_left_struct_set;
563         memcpy(&lpm_tree->prefix_usage, prefix_usage,
564                sizeof(lpm_tree->prefix_usage));
565         memset(&lpm_tree->prefix_ref_count, 0,
566                sizeof(lpm_tree->prefix_ref_count));
567         lpm_tree->ref_count = 1;
568         return lpm_tree;
569
570 err_left_struct_set:
571         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
572         return ERR_PTR(err);
573 }
574
575 static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
576                                       struct mlxsw_sp_lpm_tree *lpm_tree)
577 {
578         mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
579 }
580
581 static struct mlxsw_sp_lpm_tree *
582 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
583                       struct mlxsw_sp_prefix_usage *prefix_usage,
584                       enum mlxsw_sp_l3proto proto)
585 {
586         struct mlxsw_sp_lpm_tree *lpm_tree;
587         int i;
588
589         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
590                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
591                 if (lpm_tree->ref_count != 0 &&
592                     lpm_tree->proto == proto &&
593                     mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
594                                              prefix_usage)) {
595                         mlxsw_sp_lpm_tree_hold(lpm_tree);
596                         return lpm_tree;
597                 }
598         }
599         return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
600 }
601
602 static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
603 {
604         lpm_tree->ref_count++;
605 }
606
607 static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
608                                   struct mlxsw_sp_lpm_tree *lpm_tree)
609 {
610         if (--lpm_tree->ref_count == 0)
611                 mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
612 }
613
614 #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
615
616 static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
617 {
618         struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
619         struct mlxsw_sp_lpm_tree *lpm_tree;
620         u64 max_trees;
621         int err, i;
622
623         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
624                 return -EIO;
625
626         max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
627         mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
628         mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
629                                              sizeof(struct mlxsw_sp_lpm_tree),
630                                              GFP_KERNEL);
631         if (!mlxsw_sp->router->lpm.trees)
632                 return -ENOMEM;
633
634         for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
635                 lpm_tree = &mlxsw_sp->router->lpm.trees[i];
636                 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
637         }
638
639         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
640                                          MLXSW_SP_L3_PROTO_IPV4);
641         if (IS_ERR(lpm_tree)) {
642                 err = PTR_ERR(lpm_tree);
643                 goto err_ipv4_tree_get;
644         }
645         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
646
647         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
648                                          MLXSW_SP_L3_PROTO_IPV6);
649         if (IS_ERR(lpm_tree)) {
650                 err = PTR_ERR(lpm_tree);
651                 goto err_ipv6_tree_get;
652         }
653         mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
654
655         return 0;
656
657 err_ipv6_tree_get:
658         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
659         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
660 err_ipv4_tree_get:
661         kfree(mlxsw_sp->router->lpm.trees);
662         return err;
663 }
664
665 static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
666 {
667         struct mlxsw_sp_lpm_tree *lpm_tree;
668
669         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
670         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
671
672         lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
673         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
674
675         kfree(mlxsw_sp->router->lpm.trees);
676 }
677
678 static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
679 {
680         return !!vr->fib4 || !!vr->fib6 ||
681                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
682                !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
683 }
684
685 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
686 {
687         struct mlxsw_sp_vr *vr;
688         int i;
689
690         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
691                 vr = &mlxsw_sp->router->vrs[i];
692                 if (!mlxsw_sp_vr_is_used(vr))
693                         return vr;
694         }
695         return NULL;
696 }
697
698 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
699                                      const struct mlxsw_sp_fib *fib, u8 tree_id)
700 {
701         char raltb_pl[MLXSW_REG_RALTB_LEN];
702
703         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
704                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
705                              tree_id);
706         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
707 }
708
709 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
710                                        const struct mlxsw_sp_fib *fib)
711 {
712         char raltb_pl[MLXSW_REG_RALTB_LEN];
713
714         /* Bind to tree 0 which is default */
715         mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
716                              (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
717         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
718 }
719
720 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
721 {
722         /* For our purpose, squash main, default and local tables into one */
723         if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
724                 tb_id = RT_TABLE_MAIN;
725         return tb_id;
726 }
727
728 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
729                                             u32 tb_id)
730 {
731         struct mlxsw_sp_vr *vr;
732         int i;
733
734         tb_id = mlxsw_sp_fix_tb_id(tb_id);
735
736         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
737                 vr = &mlxsw_sp->router->vrs[i];
738                 if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
739                         return vr;
740         }
741         return NULL;
742 }
743
744 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
745                                             enum mlxsw_sp_l3proto proto)
746 {
747         switch (proto) {
748         case MLXSW_SP_L3_PROTO_IPV4:
749                 return vr->fib4;
750         case MLXSW_SP_L3_PROTO_IPV6:
751                 return vr->fib6;
752         }
753         return NULL;
754 }
755
756 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
757                                               u32 tb_id,
758                                               struct netlink_ext_ack *extack)
759 {
760         struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
761         struct mlxsw_sp_fib *fib4;
762         struct mlxsw_sp_fib *fib6;
763         struct mlxsw_sp_vr *vr;
764         int err;
765
766         vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
767         if (!vr) {
768                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
769                 return ERR_PTR(-EBUSY);
770         }
771         fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
772         if (IS_ERR(fib4))
773                 return ERR_CAST(fib4);
774         fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
775         if (IS_ERR(fib6)) {
776                 err = PTR_ERR(fib6);
777                 goto err_fib6_create;
778         }
779         mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
780                                              MLXSW_SP_L3_PROTO_IPV4);
781         if (IS_ERR(mr4_table)) {
782                 err = PTR_ERR(mr4_table);
783                 goto err_mr4_table_create;
784         }
785         mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
786                                              MLXSW_SP_L3_PROTO_IPV6);
787         if (IS_ERR(mr6_table)) {
788                 err = PTR_ERR(mr6_table);
789                 goto err_mr6_table_create;
790         }
791
792         vr->fib4 = fib4;
793         vr->fib6 = fib6;
794         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
795         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
796         vr->tb_id = tb_id;
797         return vr;
798
799 err_mr6_table_create:
800         mlxsw_sp_mr_table_destroy(mr4_table);
801 err_mr4_table_create:
802         mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
803 err_fib6_create:
804         mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
805         return ERR_PTR(err);
806 }
807
808 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
809                                 struct mlxsw_sp_vr *vr)
810 {
811         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
812         vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
813         mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
814         vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
815         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
816         vr->fib6 = NULL;
817         mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
818         vr->fib4 = NULL;
819 }
820
821 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
822                                            struct netlink_ext_ack *extack)
823 {
824         struct mlxsw_sp_vr *vr;
825
826         tb_id = mlxsw_sp_fix_tb_id(tb_id);
827         vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
828         if (!vr)
829                 vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
830         return vr;
831 }
832
833 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
834 {
835         if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
836             list_empty(&vr->fib6->node_list) &&
837             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
838             mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
839                 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
840 }
841
842 static bool
843 mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
844                                     enum mlxsw_sp_l3proto proto, u8 tree_id)
845 {
846         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
847
848         if (!mlxsw_sp_vr_is_used(vr))
849                 return false;
850         if (fib->lpm_tree->id == tree_id)
851                 return true;
852         return false;
853 }
854
855 static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
856                                         struct mlxsw_sp_fib *fib,
857                                         struct mlxsw_sp_lpm_tree *new_tree)
858 {
859         struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
860         int err;
861
862         fib->lpm_tree = new_tree;
863         mlxsw_sp_lpm_tree_hold(new_tree);
864         err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
865         if (err)
866                 goto err_tree_bind;
867         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
868         return 0;
869
870 err_tree_bind:
871         mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
872         fib->lpm_tree = old_tree;
873         return err;
874 }
875
876 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
877                                          struct mlxsw_sp_fib *fib,
878                                          struct mlxsw_sp_lpm_tree *new_tree)
879 {
880         enum mlxsw_sp_l3proto proto = fib->proto;
881         struct mlxsw_sp_lpm_tree *old_tree;
882         u8 old_id, new_id = new_tree->id;
883         struct mlxsw_sp_vr *vr;
884         int i, err;
885
886         old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
887         old_id = old_tree->id;
888
889         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
890                 vr = &mlxsw_sp->router->vrs[i];
891                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
892                         continue;
893                 err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
894                                                    mlxsw_sp_vr_fib(vr, proto),
895                                                    new_tree);
896                 if (err)
897                         goto err_tree_replace;
898         }
899
900         memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
901                sizeof(new_tree->prefix_ref_count));
902         mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
903         mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
904
905         return 0;
906
907 err_tree_replace:
908         for (i--; i >= 0; i--) {
909                 if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
910                         continue;
911                 mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
912                                              mlxsw_sp_vr_fib(vr, proto),
913                                              old_tree);
914         }
915         return err;
916 }
917
918 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
919 {
920         struct mlxsw_sp_vr *vr;
921         u64 max_vrs;
922         int i;
923
924         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
925                 return -EIO;
926
927         max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
928         mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
929                                         GFP_KERNEL);
930         if (!mlxsw_sp->router->vrs)
931                 return -ENOMEM;
932
933         for (i = 0; i < max_vrs; i++) {
934                 vr = &mlxsw_sp->router->vrs[i];
935                 vr->id = i;
936         }
937
938         return 0;
939 }
940
941 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
942
943 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
944 {
945         /* At this stage we're guaranteed not to have new incoming
946          * FIB notifications and the work queue is free from FIBs
947          * sitting on top of mlxsw netdevs. However, we can still
948          * have other FIBs queued. Flush the queue before flushing
949          * the device's tables. No need for locks, as we're the only
950          * writer.
951          */
952         mlxsw_core_flush_owq();
953         mlxsw_sp_router_fib_flush(mlxsw_sp);
954         kfree(mlxsw_sp->router->vrs);
955 }
956
957 static struct net_device *
958 __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
959 {
960         struct ip_tunnel *tun = netdev_priv(ol_dev);
961         struct net *net = dev_net(ol_dev);
962
963         return __dev_get_by_index(net, tun->parms.link);
964 }
965
966 u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
967 {
968         struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
969
970         if (d)
971                 return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
972         else
973                 return RT_TABLE_MAIN;
974 }
975
976 static struct mlxsw_sp_rif *
977 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
978                     const struct mlxsw_sp_rif_params *params,
979                     struct netlink_ext_ack *extack);
980
981 static struct mlxsw_sp_rif_ipip_lb *
982 mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
983                                 enum mlxsw_sp_ipip_type ipipt,
984                                 struct net_device *ol_dev,
985                                 struct netlink_ext_ack *extack)
986 {
987         struct mlxsw_sp_rif_params_ipip_lb lb_params;
988         const struct mlxsw_sp_ipip_ops *ipip_ops;
989         struct mlxsw_sp_rif *rif;
990
991         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
992         lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
993                 .common.dev = ol_dev,
994                 .common.lag = false,
995                 .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
996         };
997
998         rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
999         if (IS_ERR(rif))
1000                 return ERR_CAST(rif);
1001         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1002 }
1003
1004 static struct mlxsw_sp_ipip_entry *
1005 mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1006                           enum mlxsw_sp_ipip_type ipipt,
1007                           struct net_device *ol_dev)
1008 {
1009         const struct mlxsw_sp_ipip_ops *ipip_ops;
1010         struct mlxsw_sp_ipip_entry *ipip_entry;
1011         struct mlxsw_sp_ipip_entry *ret = NULL;
1012
1013         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1014         ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1015         if (!ipip_entry)
1016                 return ERR_PTR(-ENOMEM);
1017
1018         ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1019                                                             ol_dev, NULL);
1020         if (IS_ERR(ipip_entry->ol_lb)) {
1021                 ret = ERR_CAST(ipip_entry->ol_lb);
1022                 goto err_ol_ipip_lb_create;
1023         }
1024
1025         ipip_entry->ipipt = ipipt;
1026         ipip_entry->ol_dev = ol_dev;
1027
1028         switch (ipip_ops->ul_proto) {
1029         case MLXSW_SP_L3_PROTO_IPV4:
1030                 ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1031                 break;
1032         case MLXSW_SP_L3_PROTO_IPV6:
1033                 WARN_ON(1);
1034                 break;
1035         }
1036
1037         return ipip_entry;
1038
1039 err_ol_ipip_lb_create:
1040         kfree(ipip_entry);
1041         return ret;
1042 }
1043
1044 static void
1045 mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1046 {
1047         mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1048         kfree(ipip_entry);
1049 }
1050
1051 static bool
1052 mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1053                                   const enum mlxsw_sp_l3proto ul_proto,
1054                                   union mlxsw_sp_l3addr saddr,
1055                                   u32 ul_tb_id,
1056                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1057 {
1058         u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1059         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1060         union mlxsw_sp_l3addr tun_saddr;
1061
1062         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1063                 return false;
1064
1065         tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1066         return tun_ul_tb_id == ul_tb_id &&
1067                mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1068 }
1069
1070 static int
1071 mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1072                               struct mlxsw_sp_fib_entry *fib_entry,
1073                               struct mlxsw_sp_ipip_entry *ipip_entry)
1074 {
1075         u32 tunnel_index;
1076         int err;
1077
1078         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1079                                   1, &tunnel_index);
1080         if (err)
1081                 return err;
1082
1083         ipip_entry->decap_fib_entry = fib_entry;
1084         fib_entry->decap.ipip_entry = ipip_entry;
1085         fib_entry->decap.tunnel_index = tunnel_index;
1086         return 0;
1087 }
1088
1089 static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1090                                           struct mlxsw_sp_fib_entry *fib_entry)
1091 {
1092         /* Unlink this node from the IPIP entry that it's the decap entry of. */
1093         fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1094         fib_entry->decap.ipip_entry = NULL;
1095         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1096                            1, fib_entry->decap.tunnel_index);
1097 }
1098
1099 static struct mlxsw_sp_fib_node *
1100 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1101                          size_t addr_len, unsigned char prefix_len);
1102 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1103                                      struct mlxsw_sp_fib_entry *fib_entry);
1104
1105 static void
1106 mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1107                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1108 {
1109         struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1110
1111         mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1112         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1113
1114         mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1115 }
1116
1117 static void
1118 mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1119                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1120                                   struct mlxsw_sp_fib_entry *decap_fib_entry)
1121 {
1122         if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1123                                           ipip_entry))
1124                 return;
1125         decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1126
1127         if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1128                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1129 }
1130
1131 /* Given an IPIP entry, find the corresponding decap route. */
1132 static struct mlxsw_sp_fib_entry *
1133 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1134                                struct mlxsw_sp_ipip_entry *ipip_entry)
1135 {
1136         static struct mlxsw_sp_fib_node *fib_node;
1137         const struct mlxsw_sp_ipip_ops *ipip_ops;
1138         struct mlxsw_sp_fib_entry *fib_entry;
1139         unsigned char saddr_prefix_len;
1140         union mlxsw_sp_l3addr saddr;
1141         struct mlxsw_sp_fib *ul_fib;
1142         struct mlxsw_sp_vr *ul_vr;
1143         const void *saddrp;
1144         size_t saddr_len;
1145         u32 ul_tb_id;
1146         u32 saddr4;
1147
1148         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1149
1150         ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1151         ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1152         if (!ul_vr)
1153                 return NULL;
1154
1155         ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1156         saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1157                                            ipip_entry->ol_dev);
1158
1159         switch (ipip_ops->ul_proto) {
1160         case MLXSW_SP_L3_PROTO_IPV4:
1161                 saddr4 = be32_to_cpu(saddr.addr4);
1162                 saddrp = &saddr4;
1163                 saddr_len = 4;
1164                 saddr_prefix_len = 32;
1165                 break;
1166         case MLXSW_SP_L3_PROTO_IPV6:
1167                 WARN_ON(1);
1168                 return NULL;
1169         }
1170
1171         fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1172                                             saddr_prefix_len);
1173         if (!fib_node || list_empty(&fib_node->entry_list))
1174                 return NULL;
1175
1176         fib_entry = list_first_entry(&fib_node->entry_list,
1177                                      struct mlxsw_sp_fib_entry, list);
1178         if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1179                 return NULL;
1180
1181         return fib_entry;
1182 }
1183
1184 static struct mlxsw_sp_ipip_entry *
1185 mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1186                            enum mlxsw_sp_ipip_type ipipt,
1187                            struct net_device *ol_dev)
1188 {
1189         struct mlxsw_sp_ipip_entry *ipip_entry;
1190
1191         ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1192         if (IS_ERR(ipip_entry))
1193                 return ipip_entry;
1194
1195         list_add_tail(&ipip_entry->ipip_list_node,
1196                       &mlxsw_sp->router->ipip_list);
1197
1198         return ipip_entry;
1199 }
1200
1201 static void
1202 mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1203                             struct mlxsw_sp_ipip_entry *ipip_entry)
1204 {
1205         list_del(&ipip_entry->ipip_list_node);
1206         mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1207 }
1208
1209 static bool
1210 mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1211                                   const struct net_device *ul_dev,
1212                                   enum mlxsw_sp_l3proto ul_proto,
1213                                   union mlxsw_sp_l3addr ul_dip,
1214                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1215 {
1216         u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1217         enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1218
1219         if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1220                 return false;
1221
1222         return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1223                                                  ul_tb_id, ipip_entry);
1224 }
1225
1226 /* Given decap parameters, find the corresponding IPIP entry. */
1227 static struct mlxsw_sp_ipip_entry *
1228 mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1229                                   const struct net_device *ul_dev,
1230                                   enum mlxsw_sp_l3proto ul_proto,
1231                                   union mlxsw_sp_l3addr ul_dip)
1232 {
1233         struct mlxsw_sp_ipip_entry *ipip_entry;
1234
1235         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1236                             ipip_list_node)
1237                 if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1238                                                       ul_proto, ul_dip,
1239                                                       ipip_entry))
1240                         return ipip_entry;
1241
1242         return NULL;
1243 }
1244
1245 static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1246                                       const struct net_device *dev,
1247                                       enum mlxsw_sp_ipip_type *p_type)
1248 {
1249         struct mlxsw_sp_router *router = mlxsw_sp->router;
1250         const struct mlxsw_sp_ipip_ops *ipip_ops;
1251         enum mlxsw_sp_ipip_type ipipt;
1252
1253         for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1254                 ipip_ops = router->ipip_ops_arr[ipipt];
1255                 if (dev->type == ipip_ops->dev_type) {
1256                         if (p_type)
1257                                 *p_type = ipipt;
1258                         return true;
1259                 }
1260         }
1261         return false;
1262 }
1263
1264 bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1265                                 const struct net_device *dev)
1266 {
1267         return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1268 }
1269
1270 static struct mlxsw_sp_ipip_entry *
1271 mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1272                                    const struct net_device *ol_dev)
1273 {
1274         struct mlxsw_sp_ipip_entry *ipip_entry;
1275
1276         list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1277                             ipip_list_node)
1278                 if (ipip_entry->ol_dev == ol_dev)
1279                         return ipip_entry;
1280
1281         return NULL;
1282 }
1283
1284 static struct mlxsw_sp_ipip_entry *
1285 mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1286                                    const struct net_device *ul_dev,
1287                                    struct mlxsw_sp_ipip_entry *start)
1288 {
1289         struct mlxsw_sp_ipip_entry *ipip_entry;
1290
1291         ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1292                                         ipip_list_node);
1293         list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1294                                      ipip_list_node) {
1295                 struct net_device *ipip_ul_dev =
1296                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1297
1298                 if (ipip_ul_dev == ul_dev)
1299                         return ipip_entry;
1300         }
1301
1302         return NULL;
1303 }
1304
1305 bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1306                                 const struct net_device *dev)
1307 {
1308         return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1309 }
1310
1311 static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1312                                                 const struct net_device *ol_dev,
1313                                                 enum mlxsw_sp_ipip_type ipipt)
1314 {
1315         const struct mlxsw_sp_ipip_ops *ops
1316                 = mlxsw_sp->router->ipip_ops_arr[ipipt];
1317
1318         /* For deciding whether decap should be offloaded, we don't care about
1319          * overlay protocol, so ask whether either one is supported.
1320          */
1321         return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1322                ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1323 }
1324
1325 static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1326                                                 struct net_device *ol_dev)
1327 {
1328         struct mlxsw_sp_ipip_entry *ipip_entry;
1329         enum mlxsw_sp_l3proto ul_proto;
1330         enum mlxsw_sp_ipip_type ipipt;
1331         union mlxsw_sp_l3addr saddr;
1332         u32 ul_tb_id;
1333
1334         mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1335         if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1336                 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1337                 ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1338                 saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1339                 if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1340                                                           saddr, ul_tb_id,
1341                                                           NULL)) {
1342                         ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1343                                                                 ol_dev);
1344                         if (IS_ERR(ipip_entry))
1345                                 return PTR_ERR(ipip_entry);
1346                 }
1347         }
1348
1349         return 0;
1350 }
1351
1352 static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1353                                                    struct net_device *ol_dev)
1354 {
1355         struct mlxsw_sp_ipip_entry *ipip_entry;
1356
1357         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1358         if (ipip_entry)
1359                 mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1360 }
1361
1362 static void
1363 mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1364                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1365 {
1366         struct mlxsw_sp_fib_entry *decap_fib_entry;
1367
1368         decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1369         if (decap_fib_entry)
1370                 mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1371                                                   decap_fib_entry);
1372 }
1373
1374 static int
1375 mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif,
1376                         struct mlxsw_sp_vr *ul_vr, bool enable)
1377 {
1378         struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1379         struct mlxsw_sp_rif *rif = &lb_rif->common;
1380         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1381         char ritr_pl[MLXSW_REG_RITR_LEN];
1382         u32 saddr4;
1383
1384         switch (lb_cf.ul_protocol) {
1385         case MLXSW_SP_L3_PROTO_IPV4:
1386                 saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1387                 mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1388                                     rif->rif_index, rif->vr_id, rif->dev->mtu);
1389                 mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1390                             MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1391                             ul_vr->id, saddr4, lb_cf.okey);
1392                 break;
1393
1394         case MLXSW_SP_L3_PROTO_IPV6:
1395                 return -EAFNOSUPPORT;
1396         }
1397
1398         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1399 }
1400
1401 static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1402                                                  struct net_device *ol_dev)
1403 {
1404         struct mlxsw_sp_ipip_entry *ipip_entry;
1405         struct mlxsw_sp_rif_ipip_lb *lb_rif;
1406         struct mlxsw_sp_vr *ul_vr;
1407         int err = 0;
1408
1409         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1410         if (ipip_entry) {
1411                 lb_rif = ipip_entry->ol_lb;
1412                 ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
1413                 err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
1414                 if (err)
1415                         goto out;
1416                 lb_rif->common.mtu = ol_dev->mtu;
1417         }
1418
1419 out:
1420         return err;
1421 }
1422
1423 static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1424                                                 struct net_device *ol_dev)
1425 {
1426         struct mlxsw_sp_ipip_entry *ipip_entry;
1427
1428         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1429         if (ipip_entry)
1430                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1431 }
1432
1433 static void
1434 mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1435                                   struct mlxsw_sp_ipip_entry *ipip_entry)
1436 {
1437         if (ipip_entry->decap_fib_entry)
1438                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1439 }
1440
1441 static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1442                                                   struct net_device *ol_dev)
1443 {
1444         struct mlxsw_sp_ipip_entry *ipip_entry;
1445
1446         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1447         if (ipip_entry)
1448                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1449 }
1450
1451 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1452                                          struct mlxsw_sp_rif *old_rif,
1453                                          struct mlxsw_sp_rif *new_rif);
1454 static int
1455 mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1456                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1457                                  bool keep_encap,
1458                                  struct netlink_ext_ack *extack)
1459 {
1460         struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1461         struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1462
1463         new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1464                                                      ipip_entry->ipipt,
1465                                                      ipip_entry->ol_dev,
1466                                                      extack);
1467         if (IS_ERR(new_lb_rif))
1468                 return PTR_ERR(new_lb_rif);
1469         ipip_entry->ol_lb = new_lb_rif;
1470
1471         if (keep_encap)
1472                 mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1473                                              &new_lb_rif->common);
1474
1475         mlxsw_sp_rif_destroy(&old_lb_rif->common);
1476
1477         return 0;
1478 }
1479
1480 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1481                                         struct mlxsw_sp_rif *rif);
1482
1483 /**
1484  * Update the offload related to an IPIP entry. This always updates decap, and
1485  * in addition to that it also:
1486  * @recreate_loopback: recreates the associated loopback RIF
1487  * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1488  *              relevant when recreate_loopback is true.
1489  * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1490  *                   is only relevant when recreate_loopback is false.
1491  */
1492 int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1493                                         struct mlxsw_sp_ipip_entry *ipip_entry,
1494                                         bool recreate_loopback,
1495                                         bool keep_encap,
1496                                         bool update_nexthops,
1497                                         struct netlink_ext_ack *extack)
1498 {
1499         int err;
1500
1501         /* RIFs can't be edited, so to update loopback, we need to destroy and
1502          * recreate it. That creates a window of opportunity where RALUE and
1503          * RATR registers end up referencing a RIF that's already gone. RATRs
1504          * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1505          * of RALUE, demote the decap route back.
1506          */
1507         if (ipip_entry->decap_fib_entry)
1508                 mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1509
1510         if (recreate_loopback) {
1511                 err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1512                                                        keep_encap, extack);
1513                 if (err)
1514                         return err;
1515         } else if (update_nexthops) {
1516                 mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1517                                             &ipip_entry->ol_lb->common);
1518         }
1519
1520         if (ipip_entry->ol_dev->flags & IFF_UP)
1521                 mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1522
1523         return 0;
1524 }
1525
1526 static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1527                                                 struct net_device *ol_dev,
1528                                                 struct netlink_ext_ack *extack)
1529 {
1530         struct mlxsw_sp_ipip_entry *ipip_entry =
1531                 mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1532
1533         if (!ipip_entry)
1534                 return 0;
1535
1536         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1537                                                    true, false, false, extack);
1538 }
1539
1540 static int
1541 mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1542                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1543                                      struct net_device *ul_dev,
1544                                      struct netlink_ext_ack *extack)
1545 {
1546         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1547                                                    true, true, false, extack);
1548 }
1549
1550 static int
1551 mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1552                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1553                                     struct net_device *ul_dev)
1554 {
1555         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1556                                                    false, false, true, NULL);
1557 }
1558
1559 static int
1560 mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1561                                       struct mlxsw_sp_ipip_entry *ipip_entry,
1562                                       struct net_device *ul_dev)
1563 {
1564         /* A down underlay device causes encapsulated packets to not be
1565          * forwarded, but decap still works. So refresh next hops without
1566          * touching anything else.
1567          */
1568         return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1569                                                    false, false, true, NULL);
1570 }
1571
1572 static int
1573 mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1574                                         struct net_device *ol_dev,
1575                                         struct netlink_ext_ack *extack)
1576 {
1577         const struct mlxsw_sp_ipip_ops *ipip_ops;
1578         struct mlxsw_sp_ipip_entry *ipip_entry;
1579         int err;
1580
1581         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1582         if (!ipip_entry)
1583                 /* A change might make a tunnel eligible for offloading, but
1584                  * that is currently not implemented. What falls to slow path
1585                  * stays there.
1586                  */
1587                 return 0;
1588
1589         /* A change might make a tunnel not eligible for offloading. */
1590         if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1591                                                  ipip_entry->ipipt)) {
1592                 mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1593                 return 0;
1594         }
1595
1596         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1597         err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1598         return err;
1599 }
1600
1601 void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1602                                        struct mlxsw_sp_ipip_entry *ipip_entry)
1603 {
1604         struct net_device *ol_dev = ipip_entry->ol_dev;
1605
1606         if (ol_dev->flags & IFF_UP)
1607                 mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1608         mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1609 }
1610
1611 /* The configuration where several tunnels have the same local address in the
1612  * same underlay table needs special treatment in the HW. That is currently not
1613  * implemented in the driver. This function finds and demotes the first tunnel
1614  * with a given source address, except the one passed in in the argument
1615  * `except'.
1616  */
1617 bool
1618 mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1619                                      enum mlxsw_sp_l3proto ul_proto,
1620                                      union mlxsw_sp_l3addr saddr,
1621                                      u32 ul_tb_id,
1622                                      const struct mlxsw_sp_ipip_entry *except)
1623 {
1624         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1625
1626         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1627                                  ipip_list_node) {
1628                 if (ipip_entry != except &&
1629                     mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1630                                                       ul_tb_id, ipip_entry)) {
1631                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1632                         return true;
1633                 }
1634         }
1635
1636         return false;
1637 }
1638
1639 static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1640                                                      struct net_device *ul_dev)
1641 {
1642         struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1643
1644         list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1645                                  ipip_list_node) {
1646                 struct net_device *ipip_ul_dev =
1647                         __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1648
1649                 if (ipip_ul_dev == ul_dev)
1650                         mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1651         }
1652 }
1653
1654 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1655                                      struct net_device *ol_dev,
1656                                      unsigned long event,
1657                                      struct netdev_notifier_info *info)
1658 {
1659         struct netdev_notifier_changeupper_info *chup;
1660         struct netlink_ext_ack *extack;
1661
1662         switch (event) {
1663         case NETDEV_REGISTER:
1664                 return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1665         case NETDEV_UNREGISTER:
1666                 mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1667                 return 0;
1668         case NETDEV_UP:
1669                 mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1670                 return 0;
1671         case NETDEV_DOWN:
1672                 mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1673                 return 0;
1674         case NETDEV_CHANGEUPPER:
1675                 chup = container_of(info, typeof(*chup), info);
1676                 extack = info->extack;
1677                 if (netif_is_l3_master(chup->upper_dev))
1678                         return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1679                                                                     ol_dev,
1680                                                                     extack);
1681                 return 0;
1682         case NETDEV_CHANGE:
1683                 extack = info->extack;
1684                 return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1685                                                                ol_dev, extack);
1686         case NETDEV_CHANGEMTU:
1687                 return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1688         }
1689         return 0;
1690 }
1691
1692 static int
1693 __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1694                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1695                                    struct net_device *ul_dev,
1696                                    unsigned long event,
1697                                    struct netdev_notifier_info *info)
1698 {
1699         struct netdev_notifier_changeupper_info *chup;
1700         struct netlink_ext_ack *extack;
1701
1702         switch (event) {
1703         case NETDEV_CHANGEUPPER:
1704                 chup = container_of(info, typeof(*chup), info);
1705                 extack = info->extack;
1706                 if (netif_is_l3_master(chup->upper_dev))
1707                         return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1708                                                                     ipip_entry,
1709                                                                     ul_dev,
1710                                                                     extack);
1711                 break;
1712
1713         case NETDEV_UP:
1714                 return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1715                                                            ul_dev);
1716         case NETDEV_DOWN:
1717                 return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1718                                                              ipip_entry,
1719                                                              ul_dev);
1720         }
1721         return 0;
1722 }
1723
1724 int
1725 mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1726                                  struct net_device *ul_dev,
1727                                  unsigned long event,
1728                                  struct netdev_notifier_info *info)
1729 {
1730         struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1731         int err;
1732
1733         while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1734                                                                 ul_dev,
1735                                                                 ipip_entry))) {
1736                 err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1737                                                          ul_dev, event, info);
1738                 if (err) {
1739                         mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1740                                                                  ul_dev);
1741                         return err;
1742                 }
1743         }
1744
1745         return 0;
1746 }
1747
1748 struct mlxsw_sp_neigh_key {
1749         struct neighbour *n;
1750 };
1751
1752 struct mlxsw_sp_neigh_entry {
1753         struct list_head rif_list_node;
1754         struct rhash_head ht_node;
1755         struct mlxsw_sp_neigh_key key;
1756         u16 rif;
1757         bool connected;
1758         unsigned char ha[ETH_ALEN];
1759         struct list_head nexthop_list; /* list of nexthops using
1760                                         * this neigh entry
1761                                         */
1762         struct list_head nexthop_neighs_list_node;
1763         unsigned int counter_index;
1764         bool counter_valid;
1765 };
1766
1767 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1768         .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1769         .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1770         .key_len = sizeof(struct mlxsw_sp_neigh_key),
1771 };
1772
1773 struct mlxsw_sp_neigh_entry *
1774 mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1775                         struct mlxsw_sp_neigh_entry *neigh_entry)
1776 {
1777         if (!neigh_entry) {
1778                 if (list_empty(&rif->neigh_list))
1779                         return NULL;
1780                 else
1781                         return list_first_entry(&rif->neigh_list,
1782                                                 typeof(*neigh_entry),
1783                                                 rif_list_node);
1784         }
1785         if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1786                 return NULL;
1787         return list_next_entry(neigh_entry, rif_list_node);
1788 }
1789
1790 int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1791 {
1792         return neigh_entry->key.n->tbl->family;
1793 }
1794
1795 unsigned char *
1796 mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1797 {
1798         return neigh_entry->ha;
1799 }
1800
1801 u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1802 {
1803         struct neighbour *n;
1804
1805         n = neigh_entry->key.n;
1806         return ntohl(*((__be32 *) n->primary_key));
1807 }
1808
1809 struct in6_addr *
1810 mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1811 {
1812         struct neighbour *n;
1813
1814         n = neigh_entry->key.n;
1815         return (struct in6_addr *) &n->primary_key;
1816 }
1817
1818 int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1819                                struct mlxsw_sp_neigh_entry *neigh_entry,
1820                                u64 *p_counter)
1821 {
1822         if (!neigh_entry->counter_valid)
1823                 return -EINVAL;
1824
1825         return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1826                                          p_counter, NULL);
1827 }
1828
1829 static struct mlxsw_sp_neigh_entry *
1830 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1831                            u16 rif)
1832 {
1833         struct mlxsw_sp_neigh_entry *neigh_entry;
1834
1835         neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1836         if (!neigh_entry)
1837                 return NULL;
1838
1839         neigh_entry->key.n = n;
1840         neigh_entry->rif = rif;
1841         INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1842
1843         return neigh_entry;
1844 }
1845
1846 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1847 {
1848         kfree(neigh_entry);
1849 }
1850
1851 static int
1852 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1853                             struct mlxsw_sp_neigh_entry *neigh_entry)
1854 {
1855         return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1856                                       &neigh_entry->ht_node,
1857                                       mlxsw_sp_neigh_ht_params);
1858 }
1859
1860 static void
1861 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1862                             struct mlxsw_sp_neigh_entry *neigh_entry)
1863 {
1864         rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
1865                                &neigh_entry->ht_node,
1866                                mlxsw_sp_neigh_ht_params);
1867 }
1868
1869 static bool
1870 mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
1871                                     struct mlxsw_sp_neigh_entry *neigh_entry)
1872 {
1873         struct devlink *devlink;
1874         const char *table_name;
1875
1876         switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
1877         case AF_INET:
1878                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
1879                 break;
1880         case AF_INET6:
1881                 table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
1882                 break;
1883         default:
1884                 WARN_ON(1);
1885                 return false;
1886         }
1887
1888         devlink = priv_to_devlink(mlxsw_sp->core);
1889         return devlink_dpipe_table_counter_enabled(devlink, table_name);
1890 }
1891
1892 static void
1893 mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
1894                              struct mlxsw_sp_neigh_entry *neigh_entry)
1895 {
1896         if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
1897                 return;
1898
1899         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
1900                 return;
1901
1902         neigh_entry->counter_valid = true;
1903 }
1904
1905 static void
1906 mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
1907                             struct mlxsw_sp_neigh_entry *neigh_entry)
1908 {
1909         if (!neigh_entry->counter_valid)
1910                 return;
1911         mlxsw_sp_flow_counter_free(mlxsw_sp,
1912                                    neigh_entry->counter_index);
1913         neigh_entry->counter_valid = false;
1914 }
1915
1916 static struct mlxsw_sp_neigh_entry *
1917 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1918 {
1919         struct mlxsw_sp_neigh_entry *neigh_entry;
1920         struct mlxsw_sp_rif *rif;
1921         int err;
1922
1923         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
1924         if (!rif)
1925                 return ERR_PTR(-EINVAL);
1926
1927         neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
1928         if (!neigh_entry)
1929                 return ERR_PTR(-ENOMEM);
1930
1931         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
1932         if (err)
1933                 goto err_neigh_entry_insert;
1934
1935         mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
1936         list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
1937
1938         return neigh_entry;
1939
1940 err_neigh_entry_insert:
1941         mlxsw_sp_neigh_entry_free(neigh_entry);
1942         return ERR_PTR(err);
1943 }
1944
1945 static void
1946 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1947                              struct mlxsw_sp_neigh_entry *neigh_entry)
1948 {
1949         list_del(&neigh_entry->rif_list_node);
1950         mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
1951         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
1952         mlxsw_sp_neigh_entry_free(neigh_entry);
1953 }
1954
1955 static struct mlxsw_sp_neigh_entry *
1956 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
1957 {
1958         struct mlxsw_sp_neigh_key key;
1959
1960         key.n = n;
1961         return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
1962                                       &key, mlxsw_sp_neigh_ht_params);
1963 }
1964
1965 static void
1966 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
1967 {
1968         unsigned long interval;
1969
1970 #if IS_ENABLED(CONFIG_IPV6)
1971         interval = min_t(unsigned long,
1972                          NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
1973                          NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
1974 #else
1975         interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
1976 #endif
1977         mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
1978 }
1979
1980 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
1981                                                    char *rauhtd_pl,
1982                                                    int ent_index)
1983 {
1984         struct net_device *dev;
1985         struct neighbour *n;
1986         __be32 dipn;
1987         u32 dip;
1988         u16 rif;
1989
1990         mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
1991
1992         if (!mlxsw_sp->router->rifs[rif]) {
1993                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
1994                 return;
1995         }
1996
1997         dipn = htonl(dip);
1998         dev = mlxsw_sp->router->rifs[rif]->dev;
1999         n = neigh_lookup(&arp_tbl, &dipn, dev);
2000         if (!n)
2001                 return;
2002
2003         netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2004         neigh_event_send(n, NULL);
2005         neigh_release(n);
2006 }
2007
2008 #if IS_ENABLED(CONFIG_IPV6)
2009 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2010                                                    char *rauhtd_pl,
2011                                                    int rec_index)
2012 {
2013         struct net_device *dev;
2014         struct neighbour *n;
2015         struct in6_addr dip;
2016         u16 rif;
2017
2018         mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2019                                          (char *) &dip);
2020
2021         if (!mlxsw_sp->router->rifs[rif]) {
2022                 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2023                 return;
2024         }
2025
2026         dev = mlxsw_sp->router->rifs[rif]->dev;
2027         n = neigh_lookup(&nd_tbl, &dip, dev);
2028         if (!n)
2029                 return;
2030
2031         netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2032         neigh_event_send(n, NULL);
2033         neigh_release(n);
2034 }
2035 #else
2036 static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2037                                                    char *rauhtd_pl,
2038                                                    int rec_index)
2039 {
2040 }
2041 #endif
2042
2043 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2044                                                    char *rauhtd_pl,
2045                                                    int rec_index)
2046 {
2047         u8 num_entries;
2048         int i;
2049
2050         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2051                                                                 rec_index);
2052         /* Hardware starts counting at 0, so add 1. */
2053         num_entries++;
2054
2055         /* Each record consists of several neighbour entries. */
2056         for (i = 0; i < num_entries; i++) {
2057                 int ent_index;
2058
2059                 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2060                 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2061                                                        ent_index);
2062         }
2063
2064 }
2065
2066 static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2067                                                    char *rauhtd_pl,
2068                                                    int rec_index)
2069 {
2070         /* One record contains one entry. */
2071         mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2072                                                rec_index);
2073 }
2074
2075 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2076                                               char *rauhtd_pl, int rec_index)
2077 {
2078         switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2079         case MLXSW_REG_RAUHTD_TYPE_IPV4:
2080                 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2081                                                        rec_index);
2082                 break;
2083         case MLXSW_REG_RAUHTD_TYPE_IPV6:
2084                 mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2085                                                        rec_index);
2086                 break;
2087         }
2088 }
2089
2090 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2091 {
2092         u8 num_rec, last_rec_index, num_entries;
2093
2094         num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2095         last_rec_index = num_rec - 1;
2096
2097         if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2098                 return false;
2099         if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2100             MLXSW_REG_RAUHTD_TYPE_IPV6)
2101                 return true;
2102
2103         num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2104                                                                 last_rec_index);
2105         if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2106                 return true;
2107         return false;
2108 }
2109
2110 static int
2111 __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2112                                        char *rauhtd_pl,
2113                                        enum mlxsw_reg_rauhtd_type type)
2114 {
2115         int i, num_rec;
2116         int err;
2117
2118         /* Make sure the neighbour's netdev isn't removed in the
2119          * process.
2120          */
2121         rtnl_lock();
2122         do {
2123                 mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2124                 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2125                                       rauhtd_pl);
2126                 if (err) {
2127                         dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2128                         break;
2129                 }
2130                 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2131                 for (i = 0; i < num_rec; i++)
2132                         mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2133                                                           i);
2134         } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2135         rtnl_unlock();
2136
2137         return err;
2138 }
2139
2140 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2141 {
2142         enum mlxsw_reg_rauhtd_type type;
2143         char *rauhtd_pl;
2144         int err;
2145
2146         rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2147         if (!rauhtd_pl)
2148                 return -ENOMEM;
2149
2150         type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2151         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2152         if (err)
2153                 goto out;
2154
2155         type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2156         err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2157 out:
2158         kfree(rauhtd_pl);
2159         return err;
2160 }
2161
2162 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2163 {
2164         struct mlxsw_sp_neigh_entry *neigh_entry;
2165
2166         /* Take RTNL mutex here to prevent lists from changes */
2167         rtnl_lock();
2168         list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2169                             nexthop_neighs_list_node)
2170                 /* If this neigh have nexthops, make the kernel think this neigh
2171                  * is active regardless of the traffic.
2172                  */
2173                 neigh_event_send(neigh_entry->key.n, NULL);
2174         rtnl_unlock();
2175 }
2176
2177 static void
2178 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2179 {
2180         unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2181
2182         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2183                                msecs_to_jiffies(interval));
2184 }
2185
2186 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2187 {
2188         struct mlxsw_sp_router *router;
2189         int err;
2190
2191         router = container_of(work, struct mlxsw_sp_router,
2192                               neighs_update.dw.work);
2193         err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2194         if (err)
2195                 dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2196
2197         mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2198
2199         mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2200 }
2201
2202 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2203 {
2204         struct mlxsw_sp_neigh_entry *neigh_entry;
2205         struct mlxsw_sp_router *router;
2206
2207         router = container_of(work, struct mlxsw_sp_router,
2208                               nexthop_probe_dw.work);
2209         /* Iterate over nexthop neighbours, find those who are unresolved and
2210          * send arp on them. This solves the chicken-egg problem when
2211          * the nexthop wouldn't get offloaded until the neighbor is resolved
2212          * but it wouldn't get resolved ever in case traffic is flowing in HW
2213          * using different nexthop.
2214          *
2215          * Take RTNL mutex here to prevent lists from changes.
2216          */
2217         rtnl_lock();
2218         list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2219                             nexthop_neighs_list_node)
2220                 if (!neigh_entry->connected)
2221                         neigh_event_send(neigh_entry->key.n, NULL);
2222         rtnl_unlock();
2223
2224         mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2225                                MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2226 }
2227
2228 static void
2229 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2230                               struct mlxsw_sp_neigh_entry *neigh_entry,
2231                               bool removing, bool dead);
2232
2233 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2234 {
2235         return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2236                         MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2237 }
2238
2239 static void
2240 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2241                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2242                                 enum mlxsw_reg_rauht_op op)
2243 {
2244         struct neighbour *n = neigh_entry->key.n;
2245         u32 dip = ntohl(*((__be32 *) n->primary_key));
2246         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2247
2248         mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2249                               dip);
2250         if (neigh_entry->counter_valid)
2251                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2252                                              neigh_entry->counter_index);
2253         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2254 }
2255
2256 static void
2257 mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2258                                 struct mlxsw_sp_neigh_entry *neigh_entry,
2259                                 enum mlxsw_reg_rauht_op op)
2260 {
2261         struct neighbour *n = neigh_entry->key.n;
2262         char rauht_pl[MLXSW_REG_RAUHT_LEN];
2263         const char *dip = n->primary_key;
2264
2265         mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2266                               dip);
2267         if (neigh_entry->counter_valid)
2268                 mlxsw_reg_rauht_pack_counter(rauht_pl,
2269                                              neigh_entry->counter_index);
2270         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2271 }
2272
2273 bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2274 {
2275         struct neighbour *n = neigh_entry->key.n;
2276
2277         /* Packets with a link-local destination address are trapped
2278          * after LPM lookup and never reach the neighbour table, so
2279          * there is no need to program such neighbours to the device.
2280          */
2281         if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2282             IPV6_ADDR_LINKLOCAL)
2283                 return true;
2284         return false;
2285 }
2286
2287 static void
2288 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2289                             struct mlxsw_sp_neigh_entry *neigh_entry,
2290                             bool adding)
2291 {
2292         if (!adding && !neigh_entry->connected)
2293                 return;
2294         neigh_entry->connected = adding;
2295         if (neigh_entry->key.n->tbl->family == AF_INET) {
2296                 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2297                                                 mlxsw_sp_rauht_op(adding));
2298         } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2299                 if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2300                         return;
2301                 mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2302                                                 mlxsw_sp_rauht_op(adding));
2303         } else {
2304                 WARN_ON_ONCE(1);
2305         }
2306 }
2307
2308 void
2309 mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2310                                     struct mlxsw_sp_neigh_entry *neigh_entry,
2311                                     bool adding)
2312 {
2313         if (adding)
2314                 mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2315         else
2316                 mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2317         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2318 }
2319
2320 struct mlxsw_sp_netevent_work {
2321         struct work_struct work;
2322         struct mlxsw_sp *mlxsw_sp;
2323         struct neighbour *n;
2324 };
2325
2326 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2327 {
2328         struct mlxsw_sp_netevent_work *net_work =
2329                 container_of(work, struct mlxsw_sp_netevent_work, work);
2330         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2331         struct mlxsw_sp_neigh_entry *neigh_entry;
2332         struct neighbour *n = net_work->n;
2333         unsigned char ha[ETH_ALEN];
2334         bool entry_connected;
2335         u8 nud_state, dead;
2336
2337         /* If these parameters are changed after we release the lock,
2338          * then we are guaranteed to receive another event letting us
2339          * know about it.
2340          */
2341         read_lock_bh(&n->lock);
2342         memcpy(ha, n->ha, ETH_ALEN);
2343         nud_state = n->nud_state;
2344         dead = n->dead;
2345         read_unlock_bh(&n->lock);
2346
2347         rtnl_lock();
2348         mlxsw_sp_span_respin(mlxsw_sp);
2349
2350         entry_connected = nud_state & NUD_VALID && !dead;
2351         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2352         if (!entry_connected && !neigh_entry)
2353                 goto out;
2354         if (!neigh_entry) {
2355                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2356                 if (IS_ERR(neigh_entry))
2357                         goto out;
2358         }
2359
2360         memcpy(neigh_entry->ha, ha, ETH_ALEN);
2361         mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2362         mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2363                                       dead);
2364
2365         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2366                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2367
2368 out:
2369         rtnl_unlock();
2370         neigh_release(n);
2371         kfree(net_work);
2372 }
2373
2374 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2375
2376 static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2377 {
2378         struct mlxsw_sp_netevent_work *net_work =
2379                 container_of(work, struct mlxsw_sp_netevent_work, work);
2380         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2381
2382         mlxsw_sp_mp_hash_init(mlxsw_sp);
2383         kfree(net_work);
2384 }
2385
2386 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2387
2388 static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2389 {
2390         struct mlxsw_sp_netevent_work *net_work =
2391                 container_of(work, struct mlxsw_sp_netevent_work, work);
2392         struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2393
2394         __mlxsw_sp_router_init(mlxsw_sp);
2395         kfree(net_work);
2396 }
2397
2398 static int mlxsw_sp_router_schedule_work(struct net *net,
2399                                          struct notifier_block *nb,
2400                                          void (*cb)(struct work_struct *))
2401 {
2402         struct mlxsw_sp_netevent_work *net_work;
2403         struct mlxsw_sp_router *router;
2404
2405         if (!net_eq(net, &init_net))
2406                 return NOTIFY_DONE;
2407
2408         net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2409         if (!net_work)
2410                 return NOTIFY_BAD;
2411
2412         router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2413         INIT_WORK(&net_work->work, cb);
2414         net_work->mlxsw_sp = router->mlxsw_sp;
2415         mlxsw_core_schedule_work(&net_work->work);
2416         return NOTIFY_DONE;
2417 }
2418
2419 static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2420                                           unsigned long event, void *ptr)
2421 {
2422         struct mlxsw_sp_netevent_work *net_work;
2423         struct mlxsw_sp_port *mlxsw_sp_port;
2424         struct mlxsw_sp *mlxsw_sp;
2425         unsigned long interval;
2426         struct neigh_parms *p;
2427         struct neighbour *n;
2428
2429         switch (event) {
2430         case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2431                 p = ptr;
2432
2433                 /* We don't care about changes in the default table. */
2434                 if (!p->dev || (p->tbl->family != AF_INET &&
2435                                 p->tbl->family != AF_INET6))
2436                         return NOTIFY_DONE;
2437
2438                 /* We are in atomic context and can't take RTNL mutex,
2439                  * so use RCU variant to walk the device chain.
2440                  */
2441                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2442                 if (!mlxsw_sp_port)
2443                         return NOTIFY_DONE;
2444
2445                 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2446                 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2447                 mlxsw_sp->router->neighs_update.interval = interval;
2448
2449                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2450                 break;
2451         case NETEVENT_NEIGH_UPDATE:
2452                 n = ptr;
2453
2454                 if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2455                         return NOTIFY_DONE;
2456
2457                 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2458                 if (!mlxsw_sp_port)
2459                         return NOTIFY_DONE;
2460
2461                 net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2462                 if (!net_work) {
2463                         mlxsw_sp_port_dev_put(mlxsw_sp_port);
2464                         return NOTIFY_BAD;
2465                 }
2466
2467                 INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2468                 net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2469                 net_work->n = n;
2470
2471                 /* Take a reference to ensure the neighbour won't be
2472                  * destructed until we drop the reference in delayed
2473                  * work.
2474                  */
2475                 neigh_clone(n);
2476                 mlxsw_core_schedule_work(&net_work->work);
2477                 mlxsw_sp_port_dev_put(mlxsw_sp_port);
2478                 break;
2479         case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2480         case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2481                 return mlxsw_sp_router_schedule_work(ptr, nb,
2482                                 mlxsw_sp_router_mp_hash_event_work);
2483
2484         case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2485                 return mlxsw_sp_router_schedule_work(ptr, nb,
2486                                 mlxsw_sp_router_update_priority_work);
2487         }
2488
2489         return NOTIFY_DONE;
2490 }
2491
2492 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2493 {
2494         int err;
2495
2496         err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2497                               &mlxsw_sp_neigh_ht_params);
2498         if (err)
2499                 return err;
2500
2501         /* Initialize the polling interval according to the default
2502          * table.
2503          */
2504         mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2505
2506         /* Create the delayed works for the activity_update */
2507         INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2508                           mlxsw_sp_router_neighs_update_work);
2509         INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2510                           mlxsw_sp_router_probe_unresolved_nexthops);
2511         mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2512         mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2513         return 0;
2514 }
2515
2516 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2517 {
2518         cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2519         cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2520         rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2521 }
2522
2523 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2524                                          struct mlxsw_sp_rif *rif)
2525 {
2526         struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2527
2528         list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2529                                  rif_list_node) {
2530                 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2531                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2532         }
2533 }
2534
2535 enum mlxsw_sp_nexthop_type {
2536         MLXSW_SP_NEXTHOP_TYPE_ETH,
2537         MLXSW_SP_NEXTHOP_TYPE_IPIP,
2538 };
2539
2540 struct mlxsw_sp_nexthop_key {
2541         struct fib_nh *fib_nh;
2542 };
2543
2544 struct mlxsw_sp_nexthop {
2545         struct list_head neigh_list_node; /* member of neigh entry list */
2546         struct list_head rif_list_node;
2547         struct list_head router_list_node;
2548         struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2549                                                 * this belongs to
2550                                                 */
2551         struct rhash_head ht_node;
2552         struct mlxsw_sp_nexthop_key key;
2553         unsigned char gw_addr[sizeof(struct in6_addr)];
2554         int ifindex;
2555         int nh_weight;
2556         int norm_nh_weight;
2557         int num_adj_entries;
2558         struct mlxsw_sp_rif *rif;
2559         u8 should_offload:1, /* set indicates this neigh is connected and
2560                               * should be put to KVD linear area of this group.
2561                               */
2562            offloaded:1, /* set in case the neigh is actually put into
2563                          * KVD linear area of this group.
2564                          */
2565            update:1; /* set indicates that MAC of this neigh should be
2566                       * updated in HW
2567                       */
2568         enum mlxsw_sp_nexthop_type type;
2569         union {
2570                 struct mlxsw_sp_neigh_entry *neigh_entry;
2571                 struct mlxsw_sp_ipip_entry *ipip_entry;
2572         };
2573         unsigned int counter_index;
2574         bool counter_valid;
2575 };
2576
2577 struct mlxsw_sp_nexthop_group {
2578         void *priv;
2579         struct rhash_head ht_node;
2580         struct list_head fib_list; /* list of fib entries that use this group */
2581         struct neigh_table *neigh_tbl;
2582         u8 adj_index_valid:1,
2583            gateway:1; /* routes using the group use a gateway */
2584         u32 adj_index;
2585         u16 ecmp_size;
2586         u16 count;
2587         int sum_norm_weight;
2588         struct mlxsw_sp_nexthop nexthops[0];
2589 #define nh_rif  nexthops[0].rif
2590 };
2591
2592 void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2593                                     struct mlxsw_sp_nexthop *nh)
2594 {
2595         struct devlink *devlink;
2596
2597         devlink = priv_to_devlink(mlxsw_sp->core);
2598         if (!devlink_dpipe_table_counter_enabled(devlink,
2599                                                  MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2600                 return;
2601
2602         if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2603                 return;
2604
2605         nh->counter_valid = true;
2606 }
2607
2608 void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2609                                    struct mlxsw_sp_nexthop *nh)
2610 {
2611         if (!nh->counter_valid)
2612                 return;
2613         mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2614         nh->counter_valid = false;
2615 }
2616
2617 int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2618                                  struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2619 {
2620         if (!nh->counter_valid)
2621                 return -EINVAL;
2622
2623         return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2624                                          p_counter, NULL);
2625 }
2626
2627 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2628                                                struct mlxsw_sp_nexthop *nh)
2629 {
2630         if (!nh) {
2631                 if (list_empty(&router->nexthop_list))
2632                         return NULL;
2633                 else
2634                         return list_first_entry(&router->nexthop_list,
2635                                                 typeof(*nh), router_list_node);
2636         }
2637         if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2638                 return NULL;
2639         return list_next_entry(nh, router_list_node);
2640 }
2641
2642 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2643 {
2644         return nh->offloaded;
2645 }
2646
2647 unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2648 {
2649         if (!nh->offloaded)
2650                 return NULL;
2651         return nh->neigh_entry->ha;
2652 }
2653
2654 int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2655                              u32 *p_adj_size, u32 *p_adj_hash_index)
2656 {
2657         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2658         u32 adj_hash_index = 0;
2659         int i;
2660
2661         if (!nh->offloaded || !nh_grp->adj_index_valid)
2662                 return -EINVAL;
2663
2664         *p_adj_index = nh_grp->adj_index;
2665         *p_adj_size = nh_grp->ecmp_size;
2666
2667         for (i = 0; i < nh_grp->count; i++) {
2668                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2669
2670                 if (nh_iter == nh)
2671                         break;
2672                 if (nh_iter->offloaded)
2673                         adj_hash_index += nh_iter->num_adj_entries;
2674         }
2675
2676         *p_adj_hash_index = adj_hash_index;
2677         return 0;
2678 }
2679
2680 struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2681 {
2682         return nh->rif;
2683 }
2684
2685 bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2686 {
2687         struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2688         int i;
2689
2690         for (i = 0; i < nh_grp->count; i++) {
2691                 struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2692
2693                 if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2694                         return true;
2695         }
2696         return false;
2697 }
2698
2699 static struct fib_info *
2700 mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2701 {
2702         return nh_grp->priv;
2703 }
2704
2705 struct mlxsw_sp_nexthop_group_cmp_arg {
2706         enum mlxsw_sp_l3proto proto;
2707         union {
2708                 struct fib_info *fi;
2709                 struct mlxsw_sp_fib6_entry *fib6_entry;
2710         };
2711 };
2712
2713 static bool
2714 mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2715                                     const struct in6_addr *gw, int ifindex,
2716                                     int weight)
2717 {
2718         int i;
2719
2720         for (i = 0; i < nh_grp->count; i++) {
2721                 const struct mlxsw_sp_nexthop *nh;
2722
2723                 nh = &nh_grp->nexthops[i];
2724                 if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2725                     ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2726                         return true;
2727         }
2728
2729         return false;
2730 }
2731
2732 static bool
2733 mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2734                             const struct mlxsw_sp_fib6_entry *fib6_entry)
2735 {
2736         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2737
2738         if (nh_grp->count != fib6_entry->nrt6)
2739                 return false;
2740
2741         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2742                 struct in6_addr *gw;
2743                 int ifindex, weight;
2744
2745                 ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
2746                 weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
2747                 gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
2748                 if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2749                                                          weight))
2750                         return false;
2751         }
2752
2753         return true;
2754 }
2755
2756 static int
2757 mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2758 {
2759         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2760         const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2761
2762         switch (cmp_arg->proto) {
2763         case MLXSW_SP_L3_PROTO_IPV4:
2764                 return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2765         case MLXSW_SP_L3_PROTO_IPV6:
2766                 return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2767                                                     cmp_arg->fib6_entry);
2768         default:
2769                 WARN_ON(1);
2770                 return 1;
2771         }
2772 }
2773
2774 static int
2775 mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2776 {
2777         return nh_grp->neigh_tbl->family;
2778 }
2779
2780 static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2781 {
2782         const struct mlxsw_sp_nexthop_group *nh_grp = data;
2783         const struct mlxsw_sp_nexthop *nh;
2784         struct fib_info *fi;
2785         unsigned int val;
2786         int i;
2787
2788         switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2789         case AF_INET:
2790                 fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2791                 return jhash(&fi, sizeof(fi), seed);
2792         case AF_INET6:
2793                 val = nh_grp->count;
2794                 for (i = 0; i < nh_grp->count; i++) {
2795                         nh = &nh_grp->nexthops[i];
2796                         val ^= nh->ifindex;
2797                 }
2798                 return jhash(&val, sizeof(val), seed);
2799         default:
2800                 WARN_ON(1);
2801                 return 0;
2802         }
2803 }
2804
2805 static u32
2806 mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2807 {
2808         unsigned int val = fib6_entry->nrt6;
2809         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2810         struct net_device *dev;
2811
2812         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2813                 dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
2814                 val ^= dev->ifindex;
2815         }
2816
2817         return jhash(&val, sizeof(val), seed);
2818 }
2819
2820 static u32
2821 mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2822 {
2823         const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2824
2825         switch (cmp_arg->proto) {
2826         case MLXSW_SP_L3_PROTO_IPV4:
2827                 return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2828         case MLXSW_SP_L3_PROTO_IPV6:
2829                 return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2830         default:
2831                 WARN_ON(1);
2832                 return 0;
2833         }
2834 }
2835
2836 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2837         .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2838         .hashfn      = mlxsw_sp_nexthop_group_hash,
2839         .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2840         .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2841 };
2842
2843 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2844                                          struct mlxsw_sp_nexthop_group *nh_grp)
2845 {
2846         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2847             !nh_grp->gateway)
2848                 return 0;
2849
2850         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
2851                                       &nh_grp->ht_node,
2852                                       mlxsw_sp_nexthop_group_ht_params);
2853 }
2854
2855 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
2856                                           struct mlxsw_sp_nexthop_group *nh_grp)
2857 {
2858         if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2859             !nh_grp->gateway)
2860                 return;
2861
2862         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
2863                                &nh_grp->ht_node,
2864                                mlxsw_sp_nexthop_group_ht_params);
2865 }
2866
2867 static struct mlxsw_sp_nexthop_group *
2868 mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
2869                                struct fib_info *fi)
2870 {
2871         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2872
2873         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
2874         cmp_arg.fi = fi;
2875         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2876                                       &cmp_arg,
2877                                       mlxsw_sp_nexthop_group_ht_params);
2878 }
2879
2880 static struct mlxsw_sp_nexthop_group *
2881 mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
2882                                struct mlxsw_sp_fib6_entry *fib6_entry)
2883 {
2884         struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
2885
2886         cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
2887         cmp_arg.fib6_entry = fib6_entry;
2888         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
2889                                       &cmp_arg,
2890                                       mlxsw_sp_nexthop_group_ht_params);
2891 }
2892
2893 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
2894         .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
2895         .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
2896         .key_len = sizeof(struct mlxsw_sp_nexthop_key),
2897 };
2898
2899 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
2900                                    struct mlxsw_sp_nexthop *nh)
2901 {
2902         return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
2903                                       &nh->ht_node, mlxsw_sp_nexthop_ht_params);
2904 }
2905
2906 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
2907                                     struct mlxsw_sp_nexthop *nh)
2908 {
2909         rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
2910                                mlxsw_sp_nexthop_ht_params);
2911 }
2912
2913 static struct mlxsw_sp_nexthop *
2914 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
2915                         struct mlxsw_sp_nexthop_key key)
2916 {
2917         return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
2918                                       mlxsw_sp_nexthop_ht_params);
2919 }
2920
2921 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
2922                                              const struct mlxsw_sp_fib *fib,
2923                                              u32 adj_index, u16 ecmp_size,
2924                                              u32 new_adj_index,
2925                                              u16 new_ecmp_size)
2926 {
2927         char raleu_pl[MLXSW_REG_RALEU_LEN];
2928
2929         mlxsw_reg_raleu_pack(raleu_pl,
2930                              (enum mlxsw_reg_ralxx_protocol) fib->proto,
2931                              fib->vr->id, adj_index, ecmp_size, new_adj_index,
2932                              new_ecmp_size);
2933         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
2934 }
2935
2936 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
2937                                           struct mlxsw_sp_nexthop_group *nh_grp,
2938                                           u32 old_adj_index, u16 old_ecmp_size)
2939 {
2940         struct mlxsw_sp_fib_entry *fib_entry;
2941         struct mlxsw_sp_fib *fib = NULL;
2942         int err;
2943
2944         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
2945                 if (fib == fib_entry->fib_node->fib)
2946                         continue;
2947                 fib = fib_entry->fib_node->fib;
2948                 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
2949                                                         old_adj_index,
2950                                                         old_ecmp_size,
2951                                                         nh_grp->adj_index,
2952                                                         nh_grp->ecmp_size);
2953                 if (err)
2954                         return err;
2955         }
2956         return 0;
2957 }
2958
2959 static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2960                                      struct mlxsw_sp_nexthop *nh)
2961 {
2962         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
2963         char ratr_pl[MLXSW_REG_RATR_LEN];
2964
2965         mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
2966                             true, MLXSW_REG_RATR_TYPE_ETHERNET,
2967                             adj_index, neigh_entry->rif);
2968         mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
2969         if (nh->counter_valid)
2970                 mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
2971         else
2972                 mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
2973
2974         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
2975 }
2976
2977 int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
2978                             struct mlxsw_sp_nexthop *nh)
2979 {
2980         int i;
2981
2982         for (i = 0; i < nh->num_adj_entries; i++) {
2983                 int err;
2984
2985                 err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
2986                 if (err)
2987                         return err;
2988         }
2989
2990         return 0;
2991 }
2992
2993 static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
2994                                           u32 adj_index,
2995                                           struct mlxsw_sp_nexthop *nh)
2996 {
2997         const struct mlxsw_sp_ipip_ops *ipip_ops;
2998
2999         ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3000         return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3001 }
3002
3003 static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3004                                         u32 adj_index,
3005                                         struct mlxsw_sp_nexthop *nh)
3006 {
3007         int i;
3008
3009         for (i = 0; i < nh->num_adj_entries; i++) {
3010                 int err;
3011
3012                 err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3013                                                      nh);
3014                 if (err)
3015                         return err;
3016         }
3017
3018         return 0;
3019 }
3020
3021 static int
3022 mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3023                               struct mlxsw_sp_nexthop_group *nh_grp,
3024                               bool reallocate)
3025 {
3026         u32 adj_index = nh_grp->adj_index; /* base */
3027         struct mlxsw_sp_nexthop *nh;
3028         int i;
3029         int err;
3030
3031         for (i = 0; i < nh_grp->count; i++) {
3032                 nh = &nh_grp->nexthops[i];
3033
3034                 if (!nh->should_offload) {
3035                         nh->offloaded = 0;
3036                         continue;
3037                 }
3038
3039                 if (nh->update || reallocate) {
3040                         switch (nh->type) {
3041                         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3042                                 err = mlxsw_sp_nexthop_update
3043                                             (mlxsw_sp, adj_index, nh);
3044                                 break;
3045                         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3046                                 err = mlxsw_sp_nexthop_ipip_update
3047                                             (mlxsw_sp, adj_index, nh);
3048                                 break;
3049                         }
3050                         if (err)
3051                                 return err;
3052                         nh->update = 0;
3053                         nh->offloaded = 1;
3054                 }
3055                 adj_index += nh->num_adj_entries;
3056         }
3057         return 0;
3058 }
3059
3060 static bool
3061 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3062                                  const struct mlxsw_sp_fib_entry *fib_entry);
3063
3064 static int
3065 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3066                                     struct mlxsw_sp_nexthop_group *nh_grp)
3067 {
3068         struct mlxsw_sp_fib_entry *fib_entry;
3069         int err;
3070
3071         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3072                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3073                                                       fib_entry))
3074                         continue;
3075                 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3076                 if (err)
3077                         return err;
3078         }
3079         return 0;
3080 }
3081
3082 static void
3083 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3084                                    enum mlxsw_reg_ralue_op op, int err);
3085
3086 static void
3087 mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3088 {
3089         enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3090         struct mlxsw_sp_fib_entry *fib_entry;
3091
3092         list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3093                 if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3094                                                       fib_entry))
3095                         continue;
3096                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3097         }
3098 }
3099
3100 static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3101 {
3102         /* Valid sizes for an adjacency group are:
3103          * 1-64, 512, 1024, 2048 and 4096.
3104          */
3105         if (*p_adj_grp_size <= 64)
3106                 return;
3107         else if (*p_adj_grp_size <= 512)
3108                 *p_adj_grp_size = 512;
3109         else if (*p_adj_grp_size <= 1024)
3110                 *p_adj_grp_size = 1024;
3111         else if (*p_adj_grp_size <= 2048)
3112                 *p_adj_grp_size = 2048;
3113         else
3114                 *p_adj_grp_size = 4096;
3115 }
3116
3117 static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3118                                              unsigned int alloc_size)
3119 {
3120         if (alloc_size >= 4096)
3121                 *p_adj_grp_size = 4096;
3122         else if (alloc_size >= 2048)
3123                 *p_adj_grp_size = 2048;
3124         else if (alloc_size >= 1024)
3125                 *p_adj_grp_size = 1024;
3126         else if (alloc_size >= 512)
3127                 *p_adj_grp_size = 512;
3128 }
3129
3130 static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3131                                      u16 *p_adj_grp_size)
3132 {
3133         unsigned int alloc_size;
3134         int err;
3135
3136         /* Round up the requested group size to the next size supported
3137          * by the device and make sure the request can be satisfied.
3138          */
3139         mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3140         err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3141                                               MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3142                                               *p_adj_grp_size, &alloc_size);
3143         if (err)
3144                 return err;
3145         /* It is possible the allocation results in more allocated
3146          * entries than requested. Try to use as much of them as
3147          * possible.
3148          */
3149         mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3150
3151         return 0;
3152 }
3153
3154 static void
3155 mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3156 {
3157         int i, g = 0, sum_norm_weight = 0;
3158         struct mlxsw_sp_nexthop *nh;
3159
3160         for (i = 0; i < nh_grp->count; i++) {
3161                 nh = &nh_grp->nexthops[i];
3162
3163                 if (!nh->should_offload)
3164                         continue;
3165                 if (g > 0)
3166                         g = gcd(nh->nh_weight, g);
3167                 else
3168                         g = nh->nh_weight;
3169         }
3170
3171         for (i = 0; i < nh_grp->count; i++) {
3172                 nh = &nh_grp->nexthops[i];
3173
3174                 if (!nh->should_offload)
3175                         continue;
3176                 nh->norm_nh_weight = nh->nh_weight / g;
3177                 sum_norm_weight += nh->norm_nh_weight;
3178         }
3179
3180         nh_grp->sum_norm_weight = sum_norm_weight;
3181 }
3182
3183 static void
3184 mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3185 {
3186         int total = nh_grp->sum_norm_weight;
3187         u16 ecmp_size = nh_grp->ecmp_size;
3188         int i, weight = 0, lower_bound = 0;
3189
3190         for (i = 0; i < nh_grp->count; i++) {
3191                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3192                 int upper_bound;
3193
3194                 if (!nh->should_offload)
3195                         continue;
3196                 weight += nh->norm_nh_weight;
3197                 upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3198                 nh->num_adj_entries = upper_bound - lower_bound;
3199                 lower_bound = upper_bound;
3200         }
3201 }
3202
3203 static void
3204 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3205                                struct mlxsw_sp_nexthop_group *nh_grp)
3206 {
3207         u16 ecmp_size, old_ecmp_size;
3208         struct mlxsw_sp_nexthop *nh;
3209         bool offload_change = false;
3210         u32 adj_index;
3211         bool old_adj_index_valid;
3212         u32 old_adj_index;
3213         int i;
3214         int err;
3215
3216         if (!nh_grp->gateway) {
3217                 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3218                 return;
3219         }
3220
3221         for (i = 0; i < nh_grp->count; i++) {
3222                 nh = &nh_grp->nexthops[i];
3223
3224                 if (nh->should_offload != nh->offloaded) {
3225                         offload_change = true;
3226                         if (nh->should_offload)
3227                                 nh->update = 1;
3228                 }
3229         }
3230         if (!offload_change) {
3231                 /* Nothing was added or removed, so no need to reallocate. Just
3232                  * update MAC on existing adjacency indexes.
3233                  */
3234                 err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3235                 if (err) {
3236                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3237                         goto set_trap;
3238                 }
3239                 return;
3240         }
3241         mlxsw_sp_nexthop_group_normalize(nh_grp);
3242         if (!nh_grp->sum_norm_weight)
3243                 /* No neigh of this group is connected so we just set
3244                  * the trap and let everthing flow through kernel.
3245                  */
3246                 goto set_trap;
3247
3248         ecmp_size = nh_grp->sum_norm_weight;
3249         err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3250         if (err)
3251                 /* No valid allocation size available. */
3252                 goto set_trap;
3253
3254         err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3255                                   ecmp_size, &adj_index);
3256         if (err) {
3257                 /* We ran out of KVD linear space, just set the
3258                  * trap and let everything flow through kernel.
3259                  */
3260                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3261                 goto set_trap;
3262         }
3263         old_adj_index_valid = nh_grp->adj_index_valid;
3264         old_adj_index = nh_grp->adj_index;
3265         old_ecmp_size = nh_grp->ecmp_size;
3266         nh_grp->adj_index_valid = 1;
3267         nh_grp->adj_index = adj_index;
3268         nh_grp->ecmp_size = ecmp_size;
3269         mlxsw_sp_nexthop_group_rebalance(nh_grp);
3270         err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3271         if (err) {
3272                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3273                 goto set_trap;
3274         }
3275
3276         if (!old_adj_index_valid) {
3277                 /* The trap was set for fib entries, so we have to call
3278                  * fib entry update to unset it and use adjacency index.
3279                  */
3280                 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3281                 if (err) {
3282                         dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3283                         goto set_trap;
3284                 }
3285                 return;
3286         }
3287
3288         err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3289                                              old_adj_index, old_ecmp_size);
3290         mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3291                            old_ecmp_size, old_adj_index);
3292         if (err) {
3293                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3294                 goto set_trap;
3295         }
3296
3297         /* Offload state within the group changed, so update the flags. */
3298         mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3299
3300         return;
3301
3302 set_trap:
3303         old_adj_index_valid = nh_grp->adj_index_valid;
3304         nh_grp->adj_index_valid = 0;
3305         for (i = 0; i < nh_grp->count; i++) {
3306                 nh = &nh_grp->nexthops[i];
3307                 nh->offloaded = 0;
3308         }
3309         err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3310         if (err)
3311                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3312         if (old_adj_index_valid)
3313                 mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3314                                    nh_grp->ecmp_size, nh_grp->adj_index);
3315 }
3316
3317 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3318                                             bool removing)
3319 {
3320         if (!removing)
3321                 nh->should_offload = 1;
3322         else
3323                 nh->should_offload = 0;
3324         nh->update = 1;
3325 }
3326
3327 static int
3328 mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3329                                     struct mlxsw_sp_neigh_entry *neigh_entry)
3330 {
3331         struct neighbour *n, *old_n = neigh_entry->key.n;
3332         struct mlxsw_sp_nexthop *nh;
3333         bool entry_connected;
3334         u8 nud_state, dead;
3335         int err;
3336
3337         nh = list_first_entry(&neigh_entry->nexthop_list,
3338                               struct mlxsw_sp_nexthop, neigh_list_node);
3339
3340         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3341         if (!n) {
3342                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3343                                  nh->rif->dev);
3344                 if (IS_ERR(n))
3345                         return PTR_ERR(n);
3346                 neigh_event_send(n, NULL);
3347         }
3348
3349         mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3350         neigh_entry->key.n = n;
3351         err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3352         if (err)
3353                 goto err_neigh_entry_insert;
3354
3355         read_lock_bh(&n->lock);
3356         nud_state = n->nud_state;
3357         dead = n->dead;
3358         read_unlock_bh(&n->lock);
3359         entry_connected = nud_state & NUD_VALID && !dead;
3360
3361         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3362                             neigh_list_node) {
3363                 neigh_release(old_n);
3364                 neigh_clone(n);
3365                 __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3366                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3367         }
3368
3369         neigh_release(n);
3370
3371         return 0;
3372
3373 err_neigh_entry_insert:
3374         neigh_entry->key.n = old_n;
3375         mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3376         neigh_release(n);
3377         return err;
3378 }
3379
3380 static void
3381 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3382                               struct mlxsw_sp_neigh_entry *neigh_entry,
3383                               bool removing, bool dead)
3384 {
3385         struct mlxsw_sp_nexthop *nh;
3386
3387         if (list_empty(&neigh_entry->nexthop_list))
3388                 return;
3389
3390         if (dead) {
3391                 int err;
3392
3393                 err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3394                                                           neigh_entry);
3395                 if (err)
3396                         dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3397                 return;
3398         }
3399
3400         list_for_each_entry(nh, &neigh_entry->nexthop_list,
3401                             neigh_list_node) {
3402                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3403                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3404         }
3405 }
3406
3407 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3408                                       struct mlxsw_sp_rif *rif)
3409 {
3410         if (nh->rif)
3411                 return;
3412
3413         nh->rif = rif;
3414         list_add(&nh->rif_list_node, &rif->nexthop_list);
3415 }
3416
3417 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3418 {
3419         if (!nh->rif)
3420                 return;
3421
3422         list_del(&nh->rif_list_node);
3423         nh->rif = NULL;
3424 }
3425
3426 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3427                                        struct mlxsw_sp_nexthop *nh)
3428 {
3429         struct mlxsw_sp_neigh_entry *neigh_entry;
3430         struct neighbour *n;
3431         u8 nud_state, dead;
3432         int err;
3433
3434         if (!nh->nh_grp->gateway || nh->neigh_entry)
3435                 return 0;
3436
3437         /* Take a reference of neigh here ensuring that neigh would
3438          * not be destructed before the nexthop entry is finished.
3439          * The reference is taken either in neigh_lookup() or
3440          * in neigh_create() in case n is not found.
3441          */
3442         n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3443         if (!n) {
3444                 n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3445                                  nh->rif->dev);
3446                 if (IS_ERR(n))
3447                         return PTR_ERR(n);
3448                 neigh_event_send(n, NULL);
3449         }
3450         neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3451         if (!neigh_entry) {
3452                 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3453                 if (IS_ERR(neigh_entry)) {
3454                         err = -EINVAL;
3455                         goto err_neigh_entry_create;
3456                 }
3457         }
3458
3459         /* If that is the first nexthop connected to that neigh, add to
3460          * nexthop_neighs_list
3461          */
3462         if (list_empty(&neigh_entry->nexthop_list))
3463                 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3464                               &mlxsw_sp->router->nexthop_neighs_list);
3465
3466         nh->neigh_entry = neigh_entry;
3467         list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3468         read_lock_bh(&n->lock);
3469         nud_state = n->nud_state;
3470         dead = n->dead;
3471         read_unlock_bh(&n->lock);
3472         __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3473
3474         return 0;
3475
3476 err_neigh_entry_create:
3477         neigh_release(n);
3478         return err;
3479 }
3480
3481 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3482                                         struct mlxsw_sp_nexthop *nh)
3483 {
3484         struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3485         struct neighbour *n;
3486
3487         if (!neigh_entry)
3488                 return;
3489         n = neigh_entry->key.n;
3490
3491         __mlxsw_sp_nexthop_neigh_update(nh, true);
3492         list_del(&nh->neigh_list_node);
3493         nh->neigh_entry = NULL;
3494
3495         /* If that is the last nexthop connected to that neigh, remove from
3496          * nexthop_neighs_list
3497          */
3498         if (list_empty(&neigh_entry->nexthop_list))
3499                 list_del(&neigh_entry->nexthop_neighs_list_node);
3500
3501         if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3502                 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3503
3504         neigh_release(n);
3505 }
3506
3507 static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3508 {
3509         struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3510
3511         return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3512 }
3513
3514 static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3515                                        struct mlxsw_sp_nexthop *nh,
3516                                        struct mlxsw_sp_ipip_entry *ipip_entry)
3517 {
3518         bool removing;
3519
3520         if (!nh->nh_grp->gateway || nh->ipip_entry)
3521                 return;
3522
3523         nh->ipip_entry = ipip_entry;
3524         removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3525         __mlxsw_sp_nexthop_neigh_update(nh, removing);
3526         mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3527 }
3528
3529 static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3530                                        struct mlxsw_sp_nexthop *nh)
3531 {
3532         struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3533
3534         if (!ipip_entry)
3535                 return;
3536
3537         __mlxsw_sp_nexthop_neigh_update(nh, true);
3538         nh->ipip_entry = NULL;
3539 }
3540
3541 static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3542                                         const struct fib_nh *fib_nh,
3543                                         enum mlxsw_sp_ipip_type *p_ipipt)
3544 {
3545         struct net_device *dev = fib_nh->nh_dev;
3546
3547         return dev &&
3548                fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3549                mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3550 }
3551
3552 static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3553                                        struct mlxsw_sp_nexthop *nh)
3554 {
3555         switch (nh->type) {
3556         case MLXSW_SP_NEXTHOP_TYPE_ETH:
3557                 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3558                 mlxsw_sp_nexthop_rif_fini(nh);
3559                 break;
3560         case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3561                 mlxsw_sp_nexthop_rif_fini(nh);
3562                 mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3563                 break;
3564         }
3565 }
3566
3567 static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3568                                        struct mlxsw_sp_nexthop *nh,
3569                                        struct fib_nh *fib_nh)
3570 {
3571         const struct mlxsw_sp_ipip_ops *ipip_ops;
3572         struct net_device *dev = fib_nh->nh_dev;
3573         struct mlxsw_sp_ipip_entry *ipip_entry;
3574         struct mlxsw_sp_rif *rif;
3575         int err;
3576
3577         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3578         if (ipip_entry) {
3579                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3580                 if (ipip_ops->can_offload(mlxsw_sp, dev,
3581                                           MLXSW_SP_L3_PROTO_IPV4)) {
3582                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3583                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3584                         return 0;
3585                 }
3586         }
3587
3588         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3589         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3590         if (!rif)
3591                 return 0;
3592
3593         mlxsw_sp_nexthop_rif_init(nh, rif);
3594         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3595         if (err)
3596                 goto err_neigh_init;
3597
3598         return 0;
3599
3600 err_neigh_init:
3601         mlxsw_sp_nexthop_rif_fini(nh);
3602         return err;
3603 }
3604
3605 static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3606                                         struct mlxsw_sp_nexthop *nh)
3607 {
3608         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3609 }
3610
3611 static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3612                                   struct mlxsw_sp_nexthop_group *nh_grp,
3613                                   struct mlxsw_sp_nexthop *nh,
3614                                   struct fib_nh *fib_nh)
3615 {
3616         struct net_device *dev = fib_nh->nh_dev;
3617         struct in_device *in_dev;
3618         int err;
3619
3620         nh->nh_grp = nh_grp;
3621         nh->key.fib_nh = fib_nh;
3622 #ifdef CONFIG_IP_ROUTE_MULTIPATH
3623         nh->nh_weight = fib_nh->nh_weight;
3624 #else
3625         nh->nh_weight = 1;
3626 #endif
3627         memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw));
3628         err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3629         if (err)
3630                 return err;
3631
3632         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3633         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3634
3635         if (!dev)
3636                 return 0;
3637
3638         in_dev = __in_dev_get_rtnl(dev);
3639         if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3640             fib_nh->nh_flags & RTNH_F_LINKDOWN)
3641                 return 0;
3642
3643         err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3644         if (err)
3645                 goto err_nexthop_neigh_init;
3646
3647         return 0;
3648
3649 err_nexthop_neigh_init:
3650         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3651         return err;
3652 }
3653
3654 static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3655                                    struct mlxsw_sp_nexthop *nh)
3656 {
3657         mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3658         list_del(&nh->router_list_node);
3659         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3660         mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3661 }
3662
3663 static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3664                                     unsigned long event, struct fib_nh *fib_nh)
3665 {
3666         struct mlxsw_sp_nexthop_key key;
3667         struct mlxsw_sp_nexthop *nh;
3668
3669         if (mlxsw_sp->router->aborted)
3670                 return;
3671
3672         key.fib_nh = fib_nh;
3673         nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3674         if (WARN_ON_ONCE(!nh))
3675                 return;
3676
3677         switch (event) {
3678         case FIB_EVENT_NH_ADD:
3679                 mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3680                 break;
3681         case FIB_EVENT_NH_DEL:
3682                 mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3683                 break;
3684         }
3685
3686         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3687 }
3688
3689 static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3690                                         struct mlxsw_sp_rif *rif)
3691 {
3692         struct mlxsw_sp_nexthop *nh;
3693         bool removing;
3694
3695         list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3696                 switch (nh->type) {
3697                 case MLXSW_SP_NEXTHOP_TYPE_ETH:
3698                         removing = false;
3699                         break;
3700                 case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3701                         removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3702                         break;
3703                 default:
3704                         WARN_ON(1);
3705                         continue;
3706                 }
3707
3708                 __mlxsw_sp_nexthop_neigh_update(nh, removing);
3709                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3710         }
3711 }
3712
3713 static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3714                                          struct mlxsw_sp_rif *old_rif,
3715                                          struct mlxsw_sp_rif *new_rif)
3716 {
3717         struct mlxsw_sp_nexthop *nh;
3718
3719         list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3720         list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3721                 nh->rif = new_rif;
3722         mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3723 }
3724
3725 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3726                                            struct mlxsw_sp_rif *rif)
3727 {
3728         struct mlxsw_sp_nexthop *nh, *tmp;
3729
3730         list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3731                 mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3732                 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3733         }
3734 }
3735
3736 static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3737                                    const struct fib_info *fi)
3738 {
3739         return fi->fib_nh->nh_scope == RT_SCOPE_LINK ||
3740                mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3741 }
3742
3743 static struct mlxsw_sp_nexthop_group *
3744 mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3745 {
3746         struct mlxsw_sp_nexthop_group *nh_grp;
3747         struct mlxsw_sp_nexthop *nh;
3748         struct fib_nh *fib_nh;
3749         size_t alloc_size;
3750         int i;
3751         int err;
3752
3753         alloc_size = sizeof(*nh_grp) +
3754                      fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
3755         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
3756         if (!nh_grp)
3757                 return ERR_PTR(-ENOMEM);
3758         nh_grp->priv = fi;
3759         INIT_LIST_HEAD(&nh_grp->fib_list);
3760         nh_grp->neigh_tbl = &arp_tbl;
3761
3762         nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3763         nh_grp->count = fi->fib_nhs;
3764         fib_info_hold(fi);
3765         for (i = 0; i < nh_grp->count; i++) {
3766                 nh = &nh_grp->nexthops[i];
3767                 fib_nh = &fi->fib_nh[i];
3768                 err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3769                 if (err)
3770                         goto err_nexthop4_init;
3771         }
3772         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3773         if (err)
3774                 goto err_nexthop_group_insert;
3775         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3776         return nh_grp;
3777
3778 err_nexthop_group_insert:
3779 err_nexthop4_init:
3780         for (i--; i >= 0; i--) {
3781                 nh = &nh_grp->nexthops[i];
3782                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3783         }
3784         fib_info_put(fi);
3785         kfree(nh_grp);
3786         return ERR_PTR(err);
3787 }
3788
3789 static void
3790 mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3791                                 struct mlxsw_sp_nexthop_group *nh_grp)
3792 {
3793         struct mlxsw_sp_nexthop *nh;
3794         int i;
3795
3796         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3797         for (i = 0; i < nh_grp->count; i++) {
3798                 nh = &nh_grp->nexthops[i];
3799                 mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3800         }
3801         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3802         WARN_ON_ONCE(nh_grp->adj_index_valid);
3803         fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3804         kfree(nh_grp);
3805 }
3806
3807 static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3808                                        struct mlxsw_sp_fib_entry *fib_entry,
3809                                        struct fib_info *fi)
3810 {
3811         struct mlxsw_sp_nexthop_group *nh_grp;
3812
3813         nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3814         if (!nh_grp) {
3815                 nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3816                 if (IS_ERR(nh_grp))
3817                         return PTR_ERR(nh_grp);
3818         }
3819         list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3820         fib_entry->nh_group = nh_grp;
3821         return 0;
3822 }
3823
3824 static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3825                                         struct mlxsw_sp_fib_entry *fib_entry)
3826 {
3827         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3828
3829         list_del(&fib_entry->nexthop_group_node);
3830         if (!list_empty(&nh_grp->fib_list))
3831                 return;
3832         mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3833 }
3834
3835 static bool
3836 mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3837 {
3838         struct mlxsw_sp_fib4_entry *fib4_entry;
3839
3840         fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3841                                   common);
3842         return !fib4_entry->tos;
3843 }
3844
3845 static bool
3846 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3847 {
3848         struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3849
3850         switch (fib_entry->fib_node->fib->proto) {
3851         case MLXSW_SP_L3_PROTO_IPV4:
3852                 if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
3853                         return false;
3854                 break;
3855         case MLXSW_SP_L3_PROTO_IPV6:
3856                 break;
3857         }
3858
3859         switch (fib_entry->type) {
3860         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
3861                 return !!nh_group->adj_index_valid;
3862         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
3863                 return !!nh_group->nh_rif;
3864         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
3865                 return true;
3866         default:
3867                 return false;
3868         }
3869 }
3870
3871 static struct mlxsw_sp_nexthop *
3872 mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3873                      const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
3874 {
3875         int i;
3876
3877         for (i = 0; i < nh_grp->count; i++) {
3878                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3879                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3880
3881                 if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
3882                     ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
3883                                     &rt->fib6_nh.nh_gw))
3884                         return nh;
3885                 continue;
3886         }
3887
3888         return NULL;
3889 }
3890
3891 static void
3892 mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3893 {
3894         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3895         int i;
3896
3897         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
3898             fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
3899                 nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3900                 return;
3901         }
3902
3903         for (i = 0; i < nh_grp->count; i++) {
3904                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3905
3906                 if (nh->offloaded)
3907                         nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
3908                 else
3909                         nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3910         }
3911 }
3912
3913 static void
3914 mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3915 {
3916         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3917         int i;
3918
3919         if (!list_is_singular(&nh_grp->fib_list))
3920                 return;
3921
3922         for (i = 0; i < nh_grp->count; i++) {
3923                 struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3924
3925                 nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
3926         }
3927 }
3928
3929 static void
3930 mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3931 {
3932         struct mlxsw_sp_fib6_entry *fib6_entry;
3933         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3934
3935         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3936                                   common);
3937
3938         if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
3939                 list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
3940                                  list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3941                 return;
3942         }
3943
3944         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3945                 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3946                 struct mlxsw_sp_nexthop *nh;
3947
3948                 nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3949                 if (nh && nh->offloaded)
3950                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
3951                 else
3952                         mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3953         }
3954 }
3955
3956 static void
3957 mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3958 {
3959         struct mlxsw_sp_fib6_entry *fib6_entry;
3960         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3961
3962         fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
3963                                   common);
3964         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3965                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
3966
3967                 rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
3968         }
3969 }
3970
3971 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
3972 {
3973         switch (fib_entry->fib_node->fib->proto) {
3974         case MLXSW_SP_L3_PROTO_IPV4:
3975                 mlxsw_sp_fib4_entry_offload_set(fib_entry);
3976                 break;
3977         case MLXSW_SP_L3_PROTO_IPV6:
3978                 mlxsw_sp_fib6_entry_offload_set(fib_entry);
3979                 break;
3980         }
3981 }
3982
3983 static void
3984 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
3985 {
3986         switch (fib_entry->fib_node->fib->proto) {
3987         case MLXSW_SP_L3_PROTO_IPV4:
3988                 mlxsw_sp_fib4_entry_offload_unset(fib_entry);
3989                 break;
3990         case MLXSW_SP_L3_PROTO_IPV6:
3991                 mlxsw_sp_fib6_entry_offload_unset(fib_entry);
3992                 break;
3993         }
3994 }
3995
3996 static void
3997 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3998                                    enum mlxsw_reg_ralue_op op, int err)
3999 {
4000         switch (op) {
4001         case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4002                 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4003         case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4004                 if (err)
4005                         return;
4006                 if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4007                         mlxsw_sp_fib_entry_offload_set(fib_entry);
4008                 else
4009                         mlxsw_sp_fib_entry_offload_unset(fib_entry);
4010                 return;
4011         default:
4012                 return;
4013         }
4014 }
4015
4016 static void
4017 mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4018                               const struct mlxsw_sp_fib_entry *fib_entry,
4019                               enum mlxsw_reg_ralue_op op)
4020 {
4021         struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4022         enum mlxsw_reg_ralxx_protocol proto;
4023         u32 *p_dip;
4024
4025         proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4026
4027         switch (fib->proto) {
4028         case MLXSW_SP_L3_PROTO_IPV4:
4029                 p_dip = (u32 *) fib_entry->fib_node->key.addr;
4030                 mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4031                                       fib_entry->fib_node->key.prefix_len,
4032                                       *p_dip);
4033                 break;
4034         case MLXSW_SP_L3_PROTO_IPV6:
4035                 mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4036                                       fib_entry->fib_node->key.prefix_len,
4037                                       fib_entry->fib_node->key.addr);
4038                 break;
4039         }
4040 }
4041
4042 static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4043                                         struct mlxsw_sp_fib_entry *fib_entry,
4044                                         enum mlxsw_reg_ralue_op op)
4045 {
4046         char ralue_pl[MLXSW_REG_RALUE_LEN];
4047         enum mlxsw_reg_ralue_trap_action trap_action;
4048         u16 trap_id = 0;
4049         u32 adjacency_index = 0;
4050         u16 ecmp_size = 0;
4051
4052         /* In case the nexthop group adjacency index is valid, use it
4053          * with provided ECMP size. Otherwise, setup trap and pass
4054          * traffic to kernel.
4055          */
4056         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4057                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4058                 adjacency_index = fib_entry->nh_group->adj_index;
4059                 ecmp_size = fib_entry->nh_group->ecmp_size;
4060         } else {
4061                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4062                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4063         }
4064
4065         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4066         mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4067                                         adjacency_index, ecmp_size);
4068         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4069 }
4070
4071 static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4072                                        struct mlxsw_sp_fib_entry *fib_entry,
4073                                        enum mlxsw_reg_ralue_op op)
4074 {
4075         struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4076         enum mlxsw_reg_ralue_trap_action trap_action;
4077         char ralue_pl[MLXSW_REG_RALUE_LEN];
4078         u16 trap_id = 0;
4079         u16 rif_index = 0;
4080
4081         if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4082                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4083                 rif_index = rif->rif_index;
4084         } else {
4085                 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4086                 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4087         }
4088
4089         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4090         mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4091                                        rif_index);
4092         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4093 }
4094
4095 static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4096                                       struct mlxsw_sp_fib_entry *fib_entry,
4097                                       enum mlxsw_reg_ralue_op op)
4098 {
4099         char ralue_pl[MLXSW_REG_RALUE_LEN];
4100
4101         mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4102         mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4103         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4104 }
4105
4106 static int
4107 mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4108                                  struct mlxsw_sp_fib_entry *fib_entry,
4109                                  enum mlxsw_reg_ralue_op op)
4110 {
4111         struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4112         const struct mlxsw_sp_ipip_ops *ipip_ops;
4113
4114         if (WARN_ON(!ipip_entry))
4115                 return -EINVAL;
4116
4117         ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4118         return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4119                                       fib_entry->decap.tunnel_index);
4120 }
4121
4122 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4123                                    struct mlxsw_sp_fib_entry *fib_entry,
4124                                    enum mlxsw_reg_ralue_op op)
4125 {
4126         switch (fib_entry->type) {
4127         case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4128                 return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4129         case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4130                 return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4131         case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4132                 return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4133         case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4134                 return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4135                                                         fib_entry, op);
4136         }
4137         return -EINVAL;
4138 }
4139
4140 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4141                                  struct mlxsw_sp_fib_entry *fib_entry,
4142                                  enum mlxsw_reg_ralue_op op)
4143 {
4144         int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4145
4146         mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4147
4148         return err;
4149 }
4150
4151 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4152                                      struct mlxsw_sp_fib_entry *fib_entry)
4153 {
4154         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4155                                      MLXSW_REG_RALUE_OP_WRITE_WRITE);
4156 }
4157
4158 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4159                                   struct mlxsw_sp_fib_entry *fib_entry)
4160 {
4161         return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4162                                      MLXSW_REG_RALUE_OP_WRITE_DELETE);
4163 }
4164
4165 static int
4166 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4167                              const struct fib_entry_notifier_info *fen_info,
4168                              struct mlxsw_sp_fib_entry *fib_entry)
4169 {
4170         union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4171         struct net_device *dev = fen_info->fi->fib_dev;
4172         struct mlxsw_sp_ipip_entry *ipip_entry;
4173         struct fib_info *fi = fen_info->fi;
4174
4175         switch (fen_info->type) {
4176         case RTN_LOCAL:
4177                 ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4178                                                  MLXSW_SP_L3_PROTO_IPV4, dip);
4179                 if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4180                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4181                         return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4182                                                              fib_entry,
4183                                                              ipip_entry);
4184                 }
4185                 /* fall through */
4186         case RTN_BROADCAST:
4187                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4188                 return 0;
4189         case RTN_UNREACHABLE: /* fall through */
4190         case RTN_BLACKHOLE: /* fall through */
4191         case RTN_PROHIBIT:
4192                 /* Packets hitting these routes need to be trapped, but
4193                  * can do so with a lower priority than packets directed
4194                  * at the host, so use action type local instead of trap.
4195                  */
4196                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4197                 return 0;
4198         case RTN_UNICAST:
4199                 if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4200                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4201                 else
4202                         fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4203                 return 0;
4204         default:
4205                 return -EINVAL;
4206         }
4207 }
4208
4209 static struct mlxsw_sp_fib4_entry *
4210 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4211                            struct mlxsw_sp_fib_node *fib_node,
4212                            const struct fib_entry_notifier_info *fen_info)
4213 {
4214         struct mlxsw_sp_fib4_entry *fib4_entry;
4215         struct mlxsw_sp_fib_entry *fib_entry;
4216         int err;
4217
4218         fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4219         if (!fib4_entry)
4220                 return ERR_PTR(-ENOMEM);
4221         fib_entry = &fib4_entry->common;
4222
4223         err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4224         if (err)
4225                 goto err_fib4_entry_type_set;
4226
4227         err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4228         if (err)
4229                 goto err_nexthop4_group_get;
4230
4231         fib4_entry->prio = fen_info->fi->fib_priority;
4232         fib4_entry->tb_id = fen_info->tb_id;
4233         fib4_entry->type = fen_info->type;
4234         fib4_entry->tos = fen_info->tos;
4235
4236         fib_entry->fib_node = fib_node;
4237
4238         return fib4_entry;
4239
4240 err_nexthop4_group_get:
4241 err_fib4_entry_type_set:
4242         kfree(fib4_entry);
4243         return ERR_PTR(err);
4244 }
4245
4246 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4247                                         struct mlxsw_sp_fib4_entry *fib4_entry)
4248 {
4249         mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4250         kfree(fib4_entry);
4251 }
4252
4253 static struct mlxsw_sp_fib4_entry *
4254 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4255                            const struct fib_entry_notifier_info *fen_info)
4256 {
4257         struct mlxsw_sp_fib4_entry *fib4_entry;
4258         struct mlxsw_sp_fib_node *fib_node;
4259         struct mlxsw_sp_fib *fib;
4260         struct mlxsw_sp_vr *vr;
4261
4262         vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4263         if (!vr)
4264                 return NULL;
4265         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4266
4267         fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4268                                             sizeof(fen_info->dst),
4269                                             fen_info->dst_len);
4270         if (!fib_node)
4271                 return NULL;
4272
4273         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4274                 if (fib4_entry->tb_id == fen_info->tb_id &&
4275                     fib4_entry->tos == fen_info->tos &&
4276                     fib4_entry->type == fen_info->type &&
4277                     mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4278                     fen_info->fi) {
4279                         return fib4_entry;
4280                 }
4281         }
4282
4283         return NULL;
4284 }
4285
4286 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4287         .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4288         .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4289         .key_len = sizeof(struct mlxsw_sp_fib_key),
4290         .automatic_shrinking = true,
4291 };
4292
4293 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4294                                     struct mlxsw_sp_fib_node *fib_node)
4295 {
4296         return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4297                                       mlxsw_sp_fib_ht_params);
4298 }
4299
4300 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4301                                      struct mlxsw_sp_fib_node *fib_node)
4302 {
4303         rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4304                                mlxsw_sp_fib_ht_params);
4305 }
4306
4307 static struct mlxsw_sp_fib_node *
4308 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4309                          size_t addr_len, unsigned char prefix_len)
4310 {
4311         struct mlxsw_sp_fib_key key;
4312
4313         memset(&key, 0, sizeof(key));
4314         memcpy(key.addr, addr, addr_len);
4315         key.prefix_len = prefix_len;
4316         return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4317 }
4318
4319 static struct mlxsw_sp_fib_node *
4320 mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4321                          size_t addr_len, unsigned char prefix_len)
4322 {
4323         struct mlxsw_sp_fib_node *fib_node;
4324
4325         fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4326         if (!fib_node)
4327                 return NULL;
4328
4329         INIT_LIST_HEAD(&fib_node->entry_list);
4330         list_add(&fib_node->list, &fib->node_list);
4331         memcpy(fib_node->key.addr, addr, addr_len);
4332         fib_node->key.prefix_len = prefix_len;
4333
4334         return fib_node;
4335 }
4336
4337 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4338 {
4339         list_del(&fib_node->list);
4340         WARN_ON(!list_empty(&fib_node->entry_list));
4341         kfree(fib_node);
4342 }
4343
4344 static bool
4345 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4346                                  const struct mlxsw_sp_fib_entry *fib_entry)
4347 {
4348         return list_first_entry(&fib_node->entry_list,
4349                                 struct mlxsw_sp_fib_entry, list) == fib_entry;
4350 }
4351
4352 static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4353                                       struct mlxsw_sp_fib_node *fib_node)
4354 {
4355         struct mlxsw_sp_prefix_usage req_prefix_usage;
4356         struct mlxsw_sp_fib *fib = fib_node->fib;
4357         struct mlxsw_sp_lpm_tree *lpm_tree;
4358         int err;
4359
4360         lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4361         if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4362                 goto out;
4363
4364         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4365         mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4366         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4367                                          fib->proto);
4368         if (IS_ERR(lpm_tree))
4369                 return PTR_ERR(lpm_tree);
4370
4371         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4372         if (err)
4373                 goto err_lpm_tree_replace;
4374
4375 out:
4376         lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4377         return 0;
4378
4379 err_lpm_tree_replace:
4380         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4381         return err;
4382 }
4383
4384 static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4385                                          struct mlxsw_sp_fib_node *fib_node)
4386 {
4387         struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4388         struct mlxsw_sp_prefix_usage req_prefix_usage;
4389         struct mlxsw_sp_fib *fib = fib_node->fib;
4390         int err;
4391
4392         if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4393                 return;
4394         /* Try to construct a new LPM tree from the current prefix usage
4395          * minus the unused one. If we fail, continue using the old one.
4396          */
4397         mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4398         mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4399                                     fib_node->key.prefix_len);
4400         lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4401                                          fib->proto);
4402         if (IS_ERR(lpm_tree))
4403                 return;
4404
4405         err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4406         if (err)
4407                 goto err_lpm_tree_replace;
4408
4409         return;
4410
4411 err_lpm_tree_replace:
4412         mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4413 }
4414
4415 static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4416                                   struct mlxsw_sp_fib_node *fib_node,
4417                                   struct mlxsw_sp_fib *fib)
4418 {
4419         int err;
4420
4421         err = mlxsw_sp_fib_node_insert(fib, fib_node);
4422         if (err)
4423                 return err;
4424         fib_node->fib = fib;
4425
4426         err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4427         if (err)
4428                 goto err_fib_lpm_tree_link;
4429
4430         return 0;
4431
4432 err_fib_lpm_tree_link:
4433         fib_node->fib = NULL;
4434         mlxsw_sp_fib_node_remove(fib, fib_node);
4435         return err;
4436 }
4437
4438 static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4439                                    struct mlxsw_sp_fib_node *fib_node)
4440 {
4441         struct mlxsw_sp_fib *fib = fib_node->fib;
4442
4443         mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4444         fib_node->fib = NULL;
4445         mlxsw_sp_fib_node_remove(fib, fib_node);
4446 }
4447
4448 static struct mlxsw_sp_fib_node *
4449 mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4450                       size_t addr_len, unsigned char prefix_len,
4451                       enum mlxsw_sp_l3proto proto)
4452 {
4453         struct mlxsw_sp_fib_node *fib_node;
4454         struct mlxsw_sp_fib *fib;
4455         struct mlxsw_sp_vr *vr;
4456         int err;
4457
4458         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4459         if (IS_ERR(vr))
4460                 return ERR_CAST(vr);
4461         fib = mlxsw_sp_vr_fib(vr, proto);
4462
4463         fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4464         if (fib_node)
4465                 return fib_node;
4466
4467         fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4468         if (!fib_node) {
4469                 err = -ENOMEM;
4470                 goto err_fib_node_create;
4471         }
4472
4473         err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4474         if (err)
4475                 goto err_fib_node_init;
4476
4477         return fib_node;
4478
4479 err_fib_node_init:
4480         mlxsw_sp_fib_node_destroy(fib_node);
4481 err_fib_node_create:
4482         mlxsw_sp_vr_put(mlxsw_sp, vr);
4483         return ERR_PTR(err);
4484 }
4485
4486 static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4487                                   struct mlxsw_sp_fib_node *fib_node)
4488 {
4489         struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4490
4491         if (!list_empty(&fib_node->entry_list))
4492                 return;
4493         mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4494         mlxsw_sp_fib_node_destroy(fib_node);
4495         mlxsw_sp_vr_put(mlxsw_sp, vr);
4496 }
4497
4498 static struct mlxsw_sp_fib4_entry *
4499 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4500                               const struct mlxsw_sp_fib4_entry *new4_entry)
4501 {
4502         struct mlxsw_sp_fib4_entry *fib4_entry;
4503
4504         list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4505                 if (fib4_entry->tb_id > new4_entry->tb_id)
4506                         continue;
4507                 if (fib4_entry->tb_id != new4_entry->tb_id)
4508                         break;
4509                 if (fib4_entry->tos > new4_entry->tos)
4510                         continue;
4511                 if (fib4_entry->prio >= new4_entry->prio ||
4512                     fib4_entry->tos < new4_entry->tos)
4513                         return fib4_entry;
4514         }
4515
4516         return NULL;
4517 }
4518
4519 static int
4520 mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4521                                struct mlxsw_sp_fib4_entry *new4_entry)
4522 {
4523         struct mlxsw_sp_fib_node *fib_node;
4524
4525         if (WARN_ON(!fib4_entry))
4526                 return -EINVAL;
4527
4528         fib_node = fib4_entry->common.fib_node;
4529         list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4530                                  common.list) {
4531                 if (fib4_entry->tb_id != new4_entry->tb_id ||
4532                     fib4_entry->tos != new4_entry->tos ||
4533                     fib4_entry->prio != new4_entry->prio)
4534                         break;
4535         }
4536
4537         list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4538         return 0;
4539 }
4540
4541 static int
4542 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4543                                bool replace, bool append)
4544 {
4545         struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4546         struct mlxsw_sp_fib4_entry *fib4_entry;
4547
4548         fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4549
4550         if (append)
4551                 return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4552         if (replace && WARN_ON(!fib4_entry))
4553                 return -EINVAL;
4554
4555         /* Insert new entry before replaced one, so that we can later
4556          * remove the second.
4557          */
4558         if (fib4_entry) {
4559                 list_add_tail(&new4_entry->common.list,
4560                               &fib4_entry->common.list);
4561         } else {
4562                 struct mlxsw_sp_fib4_entry *last;
4563
4564                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
4565                         if (new4_entry->tb_id > last->tb_id)
4566                                 break;
4567                         fib4_entry = last;
4568                 }
4569
4570                 if (fib4_entry)
4571                         list_add(&new4_entry->common.list,
4572                                  &fib4_entry->common.list);
4573                 else
4574                         list_add(&new4_entry->common.list,
4575                                  &fib_node->entry_list);
4576         }
4577
4578         return 0;
4579 }
4580
4581 static void
4582 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4583 {
4584         list_del(&fib4_entry->common.list);
4585 }
4586
4587 static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4588                                        struct mlxsw_sp_fib_entry *fib_entry)
4589 {
4590         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4591
4592         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4593                 return 0;
4594
4595         /* To prevent packet loss, overwrite the previously offloaded
4596          * entry.
4597          */
4598         if (!list_is_singular(&fib_node->entry_list)) {
4599                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4600                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4601
4602                 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4603         }
4604
4605         return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4606 }
4607
4608 static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4609                                         struct mlxsw_sp_fib_entry *fib_entry)
4610 {
4611         struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4612
4613         if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4614                 return;
4615
4616         /* Promote the next entry by overwriting the deleted entry */
4617         if (!list_is_singular(&fib_node->entry_list)) {
4618                 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4619                 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4620
4621                 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4622                 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4623                 return;
4624         }
4625
4626         mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4627 }
4628
4629 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4630                                          struct mlxsw_sp_fib4_entry *fib4_entry,
4631                                          bool replace, bool append)
4632 {
4633         int err;
4634
4635         err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4636         if (err)
4637                 return err;
4638
4639         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4640         if (err)
4641                 goto err_fib_node_entry_add;
4642
4643         return 0;
4644
4645 err_fib_node_entry_add:
4646         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4647         return err;
4648 }
4649
4650 static void
4651 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4652                                 struct mlxsw_sp_fib4_entry *fib4_entry)
4653 {
4654         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4655         mlxsw_sp_fib4_node_list_remove(fib4_entry);
4656
4657         if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4658                 mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4659 }
4660
4661 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4662                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4663                                         bool replace)
4664 {
4665         struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4666         struct mlxsw_sp_fib4_entry *replaced;
4667
4668         if (!replace)
4669                 return;
4670
4671         /* We inserted the new entry before replaced one */
4672         replaced = list_next_entry(fib4_entry, common.list);
4673
4674         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4675         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4676         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4677 }
4678
4679 static int
4680 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4681                          const struct fib_entry_notifier_info *fen_info,
4682                          bool replace, bool append)
4683 {
4684         struct mlxsw_sp_fib4_entry *fib4_entry;
4685         struct mlxsw_sp_fib_node *fib_node;
4686         int err;
4687
4688         if (mlxsw_sp->router->aborted)
4689                 return 0;
4690
4691         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4692                                          &fen_info->dst, sizeof(fen_info->dst),
4693                                          fen_info->dst_len,
4694                                          MLXSW_SP_L3_PROTO_IPV4);
4695         if (IS_ERR(fib_node)) {
4696                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4697                 return PTR_ERR(fib_node);
4698         }
4699
4700         fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4701         if (IS_ERR(fib4_entry)) {
4702                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4703                 err = PTR_ERR(fib4_entry);
4704                 goto err_fib4_entry_create;
4705         }
4706
4707         err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4708                                             append);
4709         if (err) {
4710                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4711                 goto err_fib4_node_entry_link;
4712         }
4713
4714         mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4715
4716         return 0;
4717
4718 err_fib4_node_entry_link:
4719         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4720 err_fib4_entry_create:
4721         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4722         return err;
4723 }
4724
4725 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4726                                      struct fib_entry_notifier_info *fen_info)
4727 {
4728         struct mlxsw_sp_fib4_entry *fib4_entry;
4729         struct mlxsw_sp_fib_node *fib_node;
4730
4731         if (mlxsw_sp->router->aborted)
4732                 return;
4733
4734         fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4735         if (WARN_ON(!fib4_entry))
4736                 return;
4737         fib_node = fib4_entry->common.fib_node;
4738
4739         mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4740         mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4741         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4742 }
4743
4744 static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4745 {
4746         /* Packets with link-local destination IP arriving to the router
4747          * are trapped to the CPU, so no need to program specific routes
4748          * for them.
4749          */
4750         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4751                 return true;
4752
4753         /* Multicast routes aren't supported, so ignore them. Neighbour
4754          * Discovery packets are specifically trapped.
4755          */
4756         if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4757                 return true;
4758
4759         /* Cloned routes are irrelevant in the forwarding path. */
4760         if (rt->fib6_flags & RTF_CACHE)
4761                 return true;
4762
4763         return false;
4764 }
4765
4766 static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4767 {
4768         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4769
4770         mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4771         if (!mlxsw_sp_rt6)
4772                 return ERR_PTR(-ENOMEM);
4773
4774         /* In case of route replace, replaced route is deleted with
4775          * no notification. Take reference to prevent accessing freed
4776          * memory.
4777          */
4778         mlxsw_sp_rt6->rt = rt;
4779         fib6_info_hold(rt);
4780
4781         return mlxsw_sp_rt6;
4782 }
4783
4784 #if IS_ENABLED(CONFIG_IPV6)
4785 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4786 {
4787         fib6_info_release(rt);
4788 }
4789 #else
4790 static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4791 {
4792 }
4793 #endif
4794
4795 static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4796 {
4797         mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4798         kfree(mlxsw_sp_rt6);
4799 }
4800
4801 static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4802 {
4803         /* RTF_CACHE routes are ignored */
4804         return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
4805 }
4806
4807 static struct fib6_info *
4808 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
4809 {
4810         return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4811                                 list)->rt;
4812 }
4813
4814 static struct mlxsw_sp_fib6_entry *
4815 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4816                                  const struct fib6_info *nrt, bool replace)
4817 {
4818         struct mlxsw_sp_fib6_entry *fib6_entry;
4819
4820         if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
4821                 return NULL;
4822
4823         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
4824                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
4825
4826                 /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
4827                  * virtual router.
4828                  */
4829                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
4830                         continue;
4831                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
4832                         break;
4833                 if (rt->fib6_metric < nrt->fib6_metric)
4834                         continue;
4835                 if (rt->fib6_metric == nrt->fib6_metric &&
4836                     mlxsw_sp_fib6_rt_can_mp(rt))
4837                         return fib6_entry;
4838                 if (rt->fib6_metric > nrt->fib6_metric)
4839                         break;
4840         }
4841
4842         return NULL;
4843 }
4844
4845 static struct mlxsw_sp_rt6 *
4846 mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
4847                             const struct fib6_info *rt)
4848 {
4849         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4850
4851         list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4852                 if (mlxsw_sp_rt6->rt == rt)
4853                         return mlxsw_sp_rt6;
4854         }
4855
4856         return NULL;
4857 }
4858
4859 static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
4860                                         const struct fib6_info *rt,
4861                                         enum mlxsw_sp_ipip_type *ret)
4862 {
4863         return rt->fib6_nh.nh_dev &&
4864                mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
4865 }
4866
4867 static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
4868                                        struct mlxsw_sp_nexthop_group *nh_grp,
4869                                        struct mlxsw_sp_nexthop *nh,
4870                                        const struct fib6_info *rt)
4871 {
4872         const struct mlxsw_sp_ipip_ops *ipip_ops;
4873         struct mlxsw_sp_ipip_entry *ipip_entry;
4874         struct net_device *dev = rt->fib6_nh.nh_dev;
4875         struct mlxsw_sp_rif *rif;
4876         int err;
4877
4878         ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
4879         if (ipip_entry) {
4880                 ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4881                 if (ipip_ops->can_offload(mlxsw_sp, dev,
4882                                           MLXSW_SP_L3_PROTO_IPV6)) {
4883                         nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
4884                         mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
4885                         return 0;
4886                 }
4887         }
4888
4889         nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
4890         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
4891         if (!rif)
4892                 return 0;
4893         mlxsw_sp_nexthop_rif_init(nh, rif);
4894
4895         err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
4896         if (err)
4897                 goto err_nexthop_neigh_init;
4898
4899         return 0;
4900
4901 err_nexthop_neigh_init:
4902         mlxsw_sp_nexthop_rif_fini(nh);
4903         return err;
4904 }
4905
4906 static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
4907                                         struct mlxsw_sp_nexthop *nh)
4908 {
4909         mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
4910 }
4911
4912 static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
4913                                   struct mlxsw_sp_nexthop_group *nh_grp,
4914                                   struct mlxsw_sp_nexthop *nh,
4915                                   const struct fib6_info *rt)
4916 {
4917         struct net_device *dev = rt->fib6_nh.nh_dev;
4918
4919         nh->nh_grp = nh_grp;
4920         nh->nh_weight = rt->fib6_nh.nh_weight;
4921         memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
4922         mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
4923
4924         list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
4925
4926         if (!dev)
4927                 return 0;
4928         nh->ifindex = dev->ifindex;
4929
4930         return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
4931 }
4932
4933 static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
4934                                    struct mlxsw_sp_nexthop *nh)
4935 {
4936         mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
4937         list_del(&nh->router_list_node);
4938         mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
4939 }
4940
4941 static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4942                                     const struct fib6_info *rt)
4943 {
4944         return rt->fib6_flags & RTF_GATEWAY ||
4945                mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
4946 }
4947
4948 static struct mlxsw_sp_nexthop_group *
4949 mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
4950                                struct mlxsw_sp_fib6_entry *fib6_entry)
4951 {
4952         struct mlxsw_sp_nexthop_group *nh_grp;
4953         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4954         struct mlxsw_sp_nexthop *nh;
4955         size_t alloc_size;
4956         int i = 0;
4957         int err;
4958
4959         alloc_size = sizeof(*nh_grp) +
4960                      fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop);
4961         nh_grp = kzalloc(alloc_size, GFP_KERNEL);
4962         if (!nh_grp)
4963                 return ERR_PTR(-ENOMEM);
4964         INIT_LIST_HEAD(&nh_grp->fib_list);
4965 #if IS_ENABLED(CONFIG_IPV6)
4966         nh_grp->neigh_tbl = &nd_tbl;
4967 #endif
4968         mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
4969                                         struct mlxsw_sp_rt6, list);
4970         nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
4971         nh_grp->count = fib6_entry->nrt6;
4972         for (i = 0; i < nh_grp->count; i++) {
4973                 struct fib6_info *rt = mlxsw_sp_rt6->rt;
4974
4975                 nh = &nh_grp->nexthops[i];
4976                 err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
4977                 if (err)
4978                         goto err_nexthop6_init;
4979                 mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
4980         }
4981
4982         err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4983         if (err)
4984                 goto err_nexthop_group_insert;
4985
4986         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4987         return nh_grp;
4988
4989 err_nexthop_group_insert:
4990 err_nexthop6_init:
4991         for (i--; i >= 0; i--) {
4992                 nh = &nh_grp->nexthops[i];
4993                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
4994         }
4995         kfree(nh_grp);
4996         return ERR_PTR(err);
4997 }
4998
4999 static void
5000 mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5001                                 struct mlxsw_sp_nexthop_group *nh_grp)
5002 {
5003         struct mlxsw_sp_nexthop *nh;
5004         int i = nh_grp->count;
5005
5006         mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5007         for (i--; i >= 0; i--) {
5008                 nh = &nh_grp->nexthops[i];
5009                 mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5010         }
5011         mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5012         WARN_ON(nh_grp->adj_index_valid);
5013         kfree(nh_grp);
5014 }
5015
5016 static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5017                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5018 {
5019         struct mlxsw_sp_nexthop_group *nh_grp;
5020
5021         nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5022         if (!nh_grp) {
5023                 nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5024                 if (IS_ERR(nh_grp))
5025                         return PTR_ERR(nh_grp);
5026         }
5027
5028         list_add_tail(&fib6_entry->common.nexthop_group_node,
5029                       &nh_grp->fib_list);
5030         fib6_entry->common.nh_group = nh_grp;
5031
5032         return 0;
5033 }
5034
5035 static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5036                                         struct mlxsw_sp_fib_entry *fib_entry)
5037 {
5038         struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5039
5040         list_del(&fib_entry->nexthop_group_node);
5041         if (!list_empty(&nh_grp->fib_list))
5042                 return;
5043         mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5044 }
5045
5046 static int
5047 mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5048                                struct mlxsw_sp_fib6_entry *fib6_entry)
5049 {
5050         struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5051         int err;
5052
5053         fib6_entry->common.nh_group = NULL;
5054         list_del(&fib6_entry->common.nexthop_group_node);
5055
5056         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5057         if (err)
5058                 goto err_nexthop6_group_get;
5059
5060         /* In case this entry is offloaded, then the adjacency index
5061          * currently associated with it in the device's table is that
5062          * of the old group. Start using the new one instead.
5063          */
5064         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5065         if (err)
5066                 goto err_fib_node_entry_add;
5067
5068         if (list_empty(&old_nh_grp->fib_list))
5069                 mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5070
5071         return 0;
5072
5073 err_fib_node_entry_add:
5074         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5075 err_nexthop6_group_get:
5076         list_add_tail(&fib6_entry->common.nexthop_group_node,
5077                       &old_nh_grp->fib_list);
5078         fib6_entry->common.nh_group = old_nh_grp;
5079         return err;
5080 }
5081
5082 static int
5083 mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5084                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5085                                 struct fib6_info *rt)
5086 {
5087         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5088         int err;
5089
5090         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5091         if (IS_ERR(mlxsw_sp_rt6))
5092                 return PTR_ERR(mlxsw_sp_rt6);
5093
5094         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5095         fib6_entry->nrt6++;
5096
5097         err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5098         if (err)
5099                 goto err_nexthop6_group_update;
5100
5101         return 0;
5102
5103 err_nexthop6_group_update:
5104         fib6_entry->nrt6--;
5105         list_del(&mlxsw_sp_rt6->list);
5106         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5107         return err;
5108 }
5109
5110 static void
5111 mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5112                                 struct mlxsw_sp_fib6_entry *fib6_entry,
5113                                 struct fib6_info *rt)
5114 {
5115         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5116
5117         mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5118         if (WARN_ON(!mlxsw_sp_rt6))
5119                 return;
5120
5121         fib6_entry->nrt6--;
5122         list_del(&mlxsw_sp_rt6->list);
5123         mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5124         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5125 }
5126
5127 static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5128                                          struct mlxsw_sp_fib_entry *fib_entry,
5129                                          const struct fib6_info *rt)
5130 {
5131         /* Packets hitting RTF_REJECT routes need to be discarded by the
5132          * stack. We can rely on their destination device not having a
5133          * RIF (it's the loopback device) and can thus use action type
5134          * local, which will cause them to be trapped with a lower
5135          * priority than packets that need to be locally received.
5136          */
5137         if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5138                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5139         else if (rt->fib6_flags & RTF_REJECT)
5140                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5141         else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5142                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5143         else
5144                 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5145 }
5146
5147 static void
5148 mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5149 {
5150         struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5151
5152         list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5153                                  list) {
5154                 fib6_entry->nrt6--;
5155                 list_del(&mlxsw_sp_rt6->list);
5156                 mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5157         }
5158 }
5159
5160 static struct mlxsw_sp_fib6_entry *
5161 mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5162                            struct mlxsw_sp_fib_node *fib_node,
5163                            struct fib6_info *rt)
5164 {
5165         struct mlxsw_sp_fib6_entry *fib6_entry;
5166         struct mlxsw_sp_fib_entry *fib_entry;
5167         struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5168         int err;
5169
5170         fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5171         if (!fib6_entry)
5172                 return ERR_PTR(-ENOMEM);
5173         fib_entry = &fib6_entry->common;
5174
5175         mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5176         if (IS_ERR(mlxsw_sp_rt6)) {
5177                 err = PTR_ERR(mlxsw_sp_rt6);
5178                 goto err_rt6_create;
5179         }
5180
5181         mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5182
5183         INIT_LIST_HEAD(&fib6_entry->rt6_list);
5184         list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5185         fib6_entry->nrt6 = 1;
5186         err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5187         if (err)
5188                 goto err_nexthop6_group_get;
5189
5190         fib_entry->fib_node = fib_node;
5191
5192         return fib6_entry;
5193
5194 err_nexthop6_group_get:
5195         list_del(&mlxsw_sp_rt6->list);
5196         mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5197 err_rt6_create:
5198         kfree(fib6_entry);
5199         return ERR_PTR(err);
5200 }
5201
5202 static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5203                                         struct mlxsw_sp_fib6_entry *fib6_entry)
5204 {
5205         mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5206         mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5207         WARN_ON(fib6_entry->nrt6);
5208         kfree(fib6_entry);
5209 }
5210
5211 static struct mlxsw_sp_fib6_entry *
5212 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5213                               const struct fib6_info *nrt, bool replace)
5214 {
5215         struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5216
5217         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5218                 struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5219
5220                 if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5221                         continue;
5222                 if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5223                         break;
5224                 if (replace && rt->fib6_metric == nrt->fib6_metric) {
5225                         if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5226                             mlxsw_sp_fib6_rt_can_mp(nrt))
5227                                 return fib6_entry;
5228                         if (mlxsw_sp_fib6_rt_can_mp(nrt))
5229                                 fallback = fallback ?: fib6_entry;
5230                 }
5231                 if (rt->fib6_metric > nrt->fib6_metric)
5232                         return fallback ?: fib6_entry;
5233         }
5234
5235         return fallback;
5236 }
5237
5238 static int
5239 mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5240                                bool replace)
5241 {
5242         struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5243         struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5244         struct mlxsw_sp_fib6_entry *fib6_entry;
5245
5246         fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5247
5248         if (replace && WARN_ON(!fib6_entry))
5249                 return -EINVAL;
5250
5251         if (fib6_entry) {
5252                 list_add_tail(&new6_entry->common.list,
5253                               &fib6_entry->common.list);
5254         } else {
5255                 struct mlxsw_sp_fib6_entry *last;
5256
5257                 list_for_each_entry(last, &fib_node->entry_list, common.list) {
5258                         struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5259
5260                         if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5261                                 break;
5262                         fib6_entry = last;
5263                 }
5264
5265                 if (fib6_entry)
5266                         list_add(&new6_entry->common.list,
5267                                  &fib6_entry->common.list);
5268                 else
5269                         list_add(&new6_entry->common.list,
5270                                  &fib_node->entry_list);
5271         }
5272
5273         return 0;
5274 }
5275
5276 static void
5277 mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5278 {
5279         list_del(&fib6_entry->common.list);
5280 }
5281
5282 static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5283                                          struct mlxsw_sp_fib6_entry *fib6_entry,
5284                                          bool replace)
5285 {
5286         int err;
5287
5288         err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5289         if (err)
5290                 return err;
5291
5292         err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5293         if (err)
5294                 goto err_fib_node_entry_add;
5295
5296         return 0;
5297
5298 err_fib_node_entry_add:
5299         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5300         return err;
5301 }
5302
5303 static void
5304 mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5305                                 struct mlxsw_sp_fib6_entry *fib6_entry)
5306 {
5307         mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5308         mlxsw_sp_fib6_node_list_remove(fib6_entry);
5309 }
5310
5311 static struct mlxsw_sp_fib6_entry *
5312 mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5313                            const struct fib6_info *rt)
5314 {
5315         struct mlxsw_sp_fib6_entry *fib6_entry;
5316         struct mlxsw_sp_fib_node *fib_node;
5317         struct mlxsw_sp_fib *fib;
5318         struct mlxsw_sp_vr *vr;
5319
5320         vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5321         if (!vr)
5322                 return NULL;
5323         fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5324
5325         fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5326                                             sizeof(rt->fib6_dst.addr),
5327                                             rt->fib6_dst.plen);
5328         if (!fib_node)
5329                 return NULL;
5330
5331         list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5332                 struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5333
5334                 if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5335                     rt->fib6_metric == iter_rt->fib6_metric &&
5336                     mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5337                         return fib6_entry;
5338         }
5339
5340         return NULL;
5341 }
5342
5343 static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5344                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5345                                         bool replace)
5346 {
5347         struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5348         struct mlxsw_sp_fib6_entry *replaced;
5349
5350         if (!replace)
5351                 return;
5352
5353         replaced = list_next_entry(fib6_entry, common.list);
5354
5355         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5356         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5357         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5358 }
5359
5360 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5361                                     struct fib6_info *rt, bool replace)
5362 {
5363         struct mlxsw_sp_fib6_entry *fib6_entry;
5364         struct mlxsw_sp_fib_node *fib_node;
5365         int err;
5366
5367         if (mlxsw_sp->router->aborted)
5368                 return 0;
5369
5370         if (rt->fib6_src.plen)
5371                 return -EINVAL;
5372
5373         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5374                 return 0;
5375
5376         fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5377                                          &rt->fib6_dst.addr,
5378                                          sizeof(rt->fib6_dst.addr),
5379                                          rt->fib6_dst.plen,
5380                                          MLXSW_SP_L3_PROTO_IPV6);
5381         if (IS_ERR(fib_node))
5382                 return PTR_ERR(fib_node);
5383
5384         /* Before creating a new entry, try to append route to an existing
5385          * multipath entry.
5386          */
5387         fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5388         if (fib6_entry) {
5389                 err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5390                 if (err)
5391                         goto err_fib6_entry_nexthop_add;
5392                 return 0;
5393         }
5394
5395         fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5396         if (IS_ERR(fib6_entry)) {
5397                 err = PTR_ERR(fib6_entry);
5398                 goto err_fib6_entry_create;
5399         }
5400
5401         err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5402         if (err)
5403                 goto err_fib6_node_entry_link;
5404
5405         mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5406
5407         return 0;
5408
5409 err_fib6_node_entry_link:
5410         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5411 err_fib6_entry_create:
5412 err_fib6_entry_nexthop_add:
5413         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5414         return err;
5415 }
5416
5417 static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5418                                      struct fib6_info *rt)
5419 {
5420         struct mlxsw_sp_fib6_entry *fib6_entry;
5421         struct mlxsw_sp_fib_node *fib_node;
5422
5423         if (mlxsw_sp->router->aborted)
5424                 return;
5425
5426         if (mlxsw_sp_fib6_rt_should_ignore(rt))
5427                 return;
5428
5429         fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5430         if (WARN_ON(!fib6_entry))
5431                 return;
5432
5433         /* If route is part of a multipath entry, but not the last one
5434          * removed, then only reduce its nexthop group.
5435          */
5436         if (!list_is_singular(&fib6_entry->rt6_list)) {
5437                 mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5438                 return;
5439         }
5440
5441         fib_node = fib6_entry->common.fib_node;
5442
5443         mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5444         mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5445         mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5446 }
5447
5448 static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5449                                             enum mlxsw_reg_ralxx_protocol proto,
5450                                             u8 tree_id)
5451 {
5452         char ralta_pl[MLXSW_REG_RALTA_LEN];
5453         char ralst_pl[MLXSW_REG_RALST_LEN];
5454         int i, err;
5455
5456         mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5457         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5458         if (err)
5459                 return err;
5460
5461         mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5462         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5463         if (err)
5464                 return err;
5465
5466         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5467                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5468                 char raltb_pl[MLXSW_REG_RALTB_LEN];
5469                 char ralue_pl[MLXSW_REG_RALUE_LEN];
5470
5471                 mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5472                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5473                                       raltb_pl);
5474                 if (err)
5475                         return err;
5476
5477                 mlxsw_reg_ralue_pack(ralue_pl, proto,
5478                                      MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5479                 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5480                 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5481                                       ralue_pl);
5482                 if (err)
5483                         return err;
5484         }
5485
5486         return 0;
5487 }
5488
5489 static struct mlxsw_sp_mr_table *
5490 mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5491 {
5492         if (family == RTNL_FAMILY_IPMR)
5493                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5494         else
5495                 return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5496 }
5497
5498 static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5499                                      struct mfc_entry_notifier_info *men_info,
5500                                      bool replace)
5501 {
5502         struct mlxsw_sp_mr_table *mrt;
5503         struct mlxsw_sp_vr *vr;
5504
5505         if (mlxsw_sp->router->aborted)
5506                 return 0;
5507
5508         vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5509         if (IS_ERR(vr))
5510                 return PTR_ERR(vr);
5511
5512         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5513         return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5514 }
5515
5516 static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5517                                       struct mfc_entry_notifier_info *men_info)
5518 {
5519         struct mlxsw_sp_mr_table *mrt;
5520         struct mlxsw_sp_vr *vr;
5521
5522         if (mlxsw_sp->router->aborted)
5523                 return;
5524
5525         vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5526         if (WARN_ON(!vr))
5527                 return;
5528
5529         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5530         mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5531         mlxsw_sp_vr_put(mlxsw_sp, vr);
5532 }
5533
5534 static int
5535 mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5536                               struct vif_entry_notifier_info *ven_info)
5537 {
5538         struct mlxsw_sp_mr_table *mrt;
5539         struct mlxsw_sp_rif *rif;
5540         struct mlxsw_sp_vr *vr;
5541
5542         if (mlxsw_sp->router->aborted)
5543                 return 0;
5544
5545         vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5546         if (IS_ERR(vr))
5547                 return PTR_ERR(vr);
5548
5549         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5550         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5551         return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5552                                    ven_info->vif_index,
5553                                    ven_info->vif_flags, rif);
5554 }
5555
5556 static void
5557 mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5558                               struct vif_entry_notifier_info *ven_info)
5559 {
5560         struct mlxsw_sp_mr_table *mrt;
5561         struct mlxsw_sp_vr *vr;
5562
5563         if (mlxsw_sp->router->aborted)
5564                 return;
5565
5566         vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5567         if (WARN_ON(!vr))
5568                 return;
5569
5570         mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5571         mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5572         mlxsw_sp_vr_put(mlxsw_sp, vr);
5573 }
5574
5575 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5576 {
5577         enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5578         int err;
5579
5580         err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5581                                                MLXSW_SP_LPM_TREE_MIN);
5582         if (err)
5583                 return err;
5584
5585         /* The multicast router code does not need an abort trap as by default,
5586          * packets that don't match any routes are trapped to the CPU.
5587          */
5588
5589         proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5590         return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5591                                                 MLXSW_SP_LPM_TREE_MIN + 1);
5592 }
5593
5594 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5595                                      struct mlxsw_sp_fib_node *fib_node)
5596 {
5597         struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5598
5599         list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5600                                  common.list) {
5601                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5602
5603                 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5604                 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5605                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5606                 /* Break when entry list is empty and node was freed.
5607                  * Otherwise, we'll access freed memory in the next
5608                  * iteration.
5609                  */
5610                 if (do_break)
5611                         break;
5612         }
5613 }
5614
5615 static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5616                                      struct mlxsw_sp_fib_node *fib_node)
5617 {
5618         struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5619
5620         list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5621                                  common.list) {
5622                 bool do_break = &tmp->common.list == &fib_node->entry_list;
5623
5624                 mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5625                 mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5626                 mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5627                 if (do_break)
5628                         break;
5629         }
5630 }
5631
5632 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5633                                     struct mlxsw_sp_fib_node *fib_node)
5634 {
5635         switch (fib_node->fib->proto) {
5636         case MLXSW_SP_L3_PROTO_IPV4:
5637                 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5638                 break;
5639         case MLXSW_SP_L3_PROTO_IPV6:
5640                 mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5641                 break;
5642         }
5643 }
5644
5645 static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5646                                   struct mlxsw_sp_vr *vr,
5647                                   enum mlxsw_sp_l3proto proto)
5648 {
5649         struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5650         struct mlxsw_sp_fib_node *fib_node, *tmp;
5651
5652         list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5653                 bool do_break = &tmp->list == &fib->node_list;
5654
5655                 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5656                 if (do_break)
5657                         break;
5658         }
5659 }
5660
5661 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5662 {
5663         int i, j;
5664
5665         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5666                 struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5667
5668                 if (!mlxsw_sp_vr_is_used(vr))
5669                         continue;
5670
5671                 for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5672                         mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5673                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5674
5675                 /* If virtual router was only used for IPv4, then it's no
5676                  * longer used.
5677                  */
5678                 if (!mlxsw_sp_vr_is_used(vr))
5679                         continue;
5680                 mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5681         }
5682 }
5683
5684 static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5685 {
5686         int err;
5687
5688         if (mlxsw_sp->router->aborted)
5689                 return;
5690         dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5691         mlxsw_sp_router_fib_flush(mlxsw_sp);
5692         mlxsw_sp->router->aborted = true;
5693         err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5694         if (err)
5695                 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5696 }
5697
5698 struct mlxsw_sp_fib_event_work {
5699         struct work_struct work;
5700         union {
5701                 struct fib6_entry_notifier_info fen6_info;
5702                 struct fib_entry_notifier_info fen_info;
5703                 struct fib_rule_notifier_info fr_info;
5704                 struct fib_nh_notifier_info fnh_info;
5705                 struct mfc_entry_notifier_info men_info;
5706                 struct vif_entry_notifier_info ven_info;
5707         };
5708         struct mlxsw_sp *mlxsw_sp;
5709         unsigned long event;
5710 };
5711
5712 static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5713 {
5714         struct mlxsw_sp_fib_event_work *fib_work =
5715                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5716         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5717         bool replace, append;
5718         int err;
5719
5720         /* Protect internal structures from changes */
5721         rtnl_lock();
5722         mlxsw_sp_span_respin(mlxsw_sp);
5723
5724         switch (fib_work->event) {
5725         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5726         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5727         case FIB_EVENT_ENTRY_ADD:
5728                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5729                 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5730                 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5731                                                replace, append);
5732                 if (err)
5733                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5734                 fib_info_put(fib_work->fen_info.fi);
5735                 break;
5736         case FIB_EVENT_ENTRY_DEL:
5737                 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5738                 fib_info_put(fib_work->fen_info.fi);
5739                 break;
5740         case FIB_EVENT_RULE_ADD:
5741                 /* if we get here, a rule was added that we do not support.
5742                  * just do the fib_abort
5743                  */
5744                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5745                 break;
5746         case FIB_EVENT_NH_ADD: /* fall through */
5747         case FIB_EVENT_NH_DEL:
5748                 mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5749                                         fib_work->fnh_info.fib_nh);
5750                 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5751                 break;
5752         }
5753         rtnl_unlock();
5754         kfree(fib_work);
5755 }
5756
5757 static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5758 {
5759         struct mlxsw_sp_fib_event_work *fib_work =
5760                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5761         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5762         bool replace;
5763         int err;
5764
5765         rtnl_lock();
5766         mlxsw_sp_span_respin(mlxsw_sp);
5767
5768         switch (fib_work->event) {
5769         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5770         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5771         case FIB_EVENT_ENTRY_ADD:
5772                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5773                 err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5774                                                fib_work->fen6_info.rt, replace);
5775                 if (err)
5776                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5777                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5778                 break;
5779         case FIB_EVENT_ENTRY_DEL:
5780                 mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5781                 mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5782                 break;
5783         case FIB_EVENT_RULE_ADD:
5784                 /* if we get here, a rule was added that we do not support.
5785                  * just do the fib_abort
5786                  */
5787                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5788                 break;
5789         }
5790         rtnl_unlock();
5791         kfree(fib_work);
5792 }
5793
5794 static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5795 {
5796         struct mlxsw_sp_fib_event_work *fib_work =
5797                 container_of(work, struct mlxsw_sp_fib_event_work, work);
5798         struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5799         bool replace;
5800         int err;
5801
5802         rtnl_lock();
5803         switch (fib_work->event) {
5804         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5805         case FIB_EVENT_ENTRY_ADD:
5806                 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5807
5808                 err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
5809                                                 replace);
5810                 if (err)
5811                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5812                 mr_cache_put(fib_work->men_info.mfc);
5813                 break;
5814         case FIB_EVENT_ENTRY_DEL:
5815                 mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
5816                 mr_cache_put(fib_work->men_info.mfc);
5817                 break;
5818         case FIB_EVENT_VIF_ADD:
5819                 err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
5820                                                     &fib_work->ven_info);
5821                 if (err)
5822                         mlxsw_sp_router_fib_abort(mlxsw_sp);
5823                 dev_put(fib_work->ven_info.dev);
5824                 break;
5825         case FIB_EVENT_VIF_DEL:
5826                 mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
5827                                               &fib_work->ven_info);
5828                 dev_put(fib_work->ven_info.dev);
5829                 break;
5830         case FIB_EVENT_RULE_ADD:
5831                 /* if we get here, a rule was added that we do not support.
5832                  * just do the fib_abort
5833                  */
5834                 mlxsw_sp_router_fib_abort(mlxsw_sp);
5835                 break;
5836         }
5837         rtnl_unlock();
5838         kfree(fib_work);
5839 }
5840
5841 static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
5842                                        struct fib_notifier_info *info)
5843 {
5844         struct fib_entry_notifier_info *fen_info;
5845         struct fib_nh_notifier_info *fnh_info;
5846
5847         switch (fib_work->event) {
5848         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5849         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5850         case FIB_EVENT_ENTRY_ADD: /* fall through */
5851         case FIB_EVENT_ENTRY_DEL:
5852                 fen_info = container_of(info, struct fib_entry_notifier_info,
5853                                         info);
5854                 fib_work->fen_info = *fen_info;
5855                 /* Take reference on fib_info to prevent it from being
5856                  * freed while work is queued. Release it afterwards.
5857                  */
5858                 fib_info_hold(fib_work->fen_info.fi);
5859                 break;
5860         case FIB_EVENT_NH_ADD: /* fall through */
5861         case FIB_EVENT_NH_DEL:
5862                 fnh_info = container_of(info, struct fib_nh_notifier_info,
5863                                         info);
5864                 fib_work->fnh_info = *fnh_info;
5865                 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
5866                 break;
5867         }
5868 }
5869
5870 static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
5871                                        struct fib_notifier_info *info)
5872 {
5873         struct fib6_entry_notifier_info *fen6_info;
5874
5875         switch (fib_work->event) {
5876         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5877         case FIB_EVENT_ENTRY_APPEND: /* fall through */
5878         case FIB_EVENT_ENTRY_ADD: /* fall through */
5879         case FIB_EVENT_ENTRY_DEL:
5880                 fen6_info = container_of(info, struct fib6_entry_notifier_info,
5881                                          info);
5882                 fib_work->fen6_info = *fen6_info;
5883                 fib6_info_hold(fib_work->fen6_info.rt);
5884                 break;
5885         }
5886 }
5887
5888 static void
5889 mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
5890                             struct fib_notifier_info *info)
5891 {
5892         switch (fib_work->event) {
5893         case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5894         case FIB_EVENT_ENTRY_ADD: /* fall through */
5895         case FIB_EVENT_ENTRY_DEL:
5896                 memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
5897                 mr_cache_hold(fib_work->men_info.mfc);
5898                 break;
5899         case FIB_EVENT_VIF_ADD: /* fall through */
5900         case FIB_EVENT_VIF_DEL:
5901                 memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
5902                 dev_hold(fib_work->ven_info.dev);
5903                 break;
5904         }
5905 }
5906
5907 static int mlxsw_sp_router_fib_rule_event(unsigned long event,
5908                                           struct fib_notifier_info *info,
5909                                           struct mlxsw_sp *mlxsw_sp)
5910 {
5911         struct netlink_ext_ack *extack = info->extack;
5912         struct fib_rule_notifier_info *fr_info;
5913         struct fib_rule *rule;
5914         int err = 0;
5915
5916         /* nothing to do at the moment */
5917         if (event == FIB_EVENT_RULE_DEL)
5918                 return 0;
5919
5920         if (mlxsw_sp->router->aborted)
5921                 return 0;
5922
5923         fr_info = container_of(info, struct fib_rule_notifier_info, info);
5924         rule = fr_info->rule;
5925
5926         switch (info->family) {
5927         case AF_INET:
5928                 if (!fib4_rule_default(rule) && !rule->l3mdev)
5929                         err = -EOPNOTSUPP;
5930                 break;
5931         case AF_INET6:
5932                 if (!fib6_rule_default(rule) && !rule->l3mdev)
5933                         err = -EOPNOTSUPP;
5934                 break;
5935         case RTNL_FAMILY_IPMR:
5936                 if (!ipmr_rule_default(rule) && !rule->l3mdev)
5937                         err = -EOPNOTSUPP;
5938                 break;
5939         case RTNL_FAMILY_IP6MR:
5940                 if (!ip6mr_rule_default(rule) && !rule->l3mdev)
5941                         err = -EOPNOTSUPP;
5942                 break;
5943         }
5944
5945         if (err < 0)
5946                 NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
5947
5948         return err;
5949 }
5950
5951 /* Called with rcu_read_lock() */
5952 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
5953                                      unsigned long event, void *ptr)
5954 {
5955         struct mlxsw_sp_fib_event_work *fib_work;
5956         struct fib_notifier_info *info = ptr;
5957         struct mlxsw_sp_router *router;
5958         int err;
5959
5960         if (!net_eq(info->net, &init_net) ||
5961             (info->family != AF_INET && info->family != AF_INET6 &&
5962              info->family != RTNL_FAMILY_IPMR &&
5963              info->family != RTNL_FAMILY_IP6MR))
5964                 return NOTIFY_DONE;
5965
5966         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
5967
5968         switch (event) {
5969         case FIB_EVENT_RULE_ADD: /* fall through */
5970         case FIB_EVENT_RULE_DEL:
5971                 err = mlxsw_sp_router_fib_rule_event(event, info,
5972                                                      router->mlxsw_sp);
5973                 if (!err || info->extack)
5974                         return notifier_from_errno(err);
5975                 break;
5976         case FIB_EVENT_ENTRY_ADD:
5977                 if (router->aborted) {
5978                         NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
5979                         return notifier_from_errno(-EINVAL);
5980                 }
5981                 break;
5982         }
5983
5984         fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
5985         if (!fib_work)
5986                 return NOTIFY_BAD;
5987
5988         fib_work->mlxsw_sp = router->mlxsw_sp;
5989         fib_work->event = event;
5990
5991         switch (info->family) {
5992         case AF_INET:
5993                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
5994                 mlxsw_sp_router_fib4_event(fib_work, info);
5995                 break;
5996         case AF_INET6:
5997                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
5998                 mlxsw_sp_router_fib6_event(fib_work, info);
5999                 break;
6000         case RTNL_FAMILY_IP6MR:
6001         case RTNL_FAMILY_IPMR:
6002                 INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6003                 mlxsw_sp_router_fibmr_event(fib_work, info);
6004                 break;
6005         }
6006
6007         mlxsw_core_schedule_work(&fib_work->work);
6008
6009         return NOTIFY_DONE;
6010 }
6011
6012 struct mlxsw_sp_rif *
6013 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6014                          const struct net_device *dev)
6015 {
6016         int i;
6017
6018         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6019                 if (mlxsw_sp->router->rifs[i] &&
6020                     mlxsw_sp->router->rifs[i]->dev == dev)
6021                         return mlxsw_sp->router->rifs[i];
6022
6023         return NULL;
6024 }
6025
6026 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6027 {
6028         char ritr_pl[MLXSW_REG_RITR_LEN];
6029         int err;
6030
6031         mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6032         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6033         if (WARN_ON_ONCE(err))
6034                 return err;
6035
6036         mlxsw_reg_ritr_enable_set(ritr_pl, false);
6037         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6038 }
6039
6040 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6041                                           struct mlxsw_sp_rif *rif)
6042 {
6043         mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6044         mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6045         mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6046 }
6047
6048 static bool
6049 mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6050                            unsigned long event)
6051 {
6052         struct inet6_dev *inet6_dev;
6053         bool addr_list_empty = true;
6054         struct in_device *idev;
6055
6056         switch (event) {
6057         case NETDEV_UP:
6058                 return rif == NULL;
6059         case NETDEV_DOWN:
6060                 idev = __in_dev_get_rtnl(dev);
6061                 if (idev && idev->ifa_list)
6062                         addr_list_empty = false;
6063
6064                 inet6_dev = __in6_dev_get(dev);
6065                 if (addr_list_empty && inet6_dev &&
6066                     !list_empty(&inet6_dev->addr_list))
6067                         addr_list_empty = false;
6068
6069                 /* macvlans do not have a RIF, but rather piggy back on the
6070                  * RIF of their lower device.
6071                  */
6072                 if (netif_is_macvlan(dev) && addr_list_empty)
6073                         return true;
6074
6075                 if (rif && addr_list_empty &&
6076                     !netif_is_l3_slave(rif->dev))
6077                         return true;
6078                 /* It is possible we already removed the RIF ourselves
6079                  * if it was assigned to a netdev that is now a bridge
6080                  * or LAG slave.
6081                  */
6082                 return false;
6083         }
6084
6085         return false;
6086 }
6087
6088 static enum mlxsw_sp_rif_type
6089 mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6090                       const struct net_device *dev)
6091 {
6092         enum mlxsw_sp_fid_type type;
6093
6094         if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6095                 return MLXSW_SP_RIF_TYPE_IPIP_LB;
6096
6097         /* Otherwise RIF type is derived from the type of the underlying FID. */
6098         if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6099                 type = MLXSW_SP_FID_TYPE_8021Q;
6100         else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6101                 type = MLXSW_SP_FID_TYPE_8021Q;
6102         else if (netif_is_bridge_master(dev))
6103                 type = MLXSW_SP_FID_TYPE_8021D;
6104         else
6105                 type = MLXSW_SP_FID_TYPE_RFID;
6106
6107         return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6108 }
6109
6110 static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6111 {
6112         int i;
6113
6114         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6115                 if (!mlxsw_sp->router->rifs[i]) {
6116                         *p_rif_index = i;
6117                         return 0;
6118                 }
6119         }
6120
6121         return -ENOBUFS;
6122 }
6123
6124 static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6125                                                u16 vr_id,
6126                                                struct net_device *l3_dev)
6127 {
6128         struct mlxsw_sp_rif *rif;
6129
6130         rif = kzalloc(rif_size, GFP_KERNEL);
6131         if (!rif)
6132                 return NULL;
6133
6134         INIT_LIST_HEAD(&rif->nexthop_list);
6135         INIT_LIST_HEAD(&rif->neigh_list);
6136         ether_addr_copy(rif->addr, l3_dev->dev_addr);
6137         rif->mtu = l3_dev->mtu;
6138         rif->vr_id = vr_id;
6139         rif->dev = l3_dev;
6140         rif->rif_index = rif_index;
6141
6142         return rif;
6143 }
6144
6145 struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6146                                            u16 rif_index)
6147 {
6148         return mlxsw_sp->router->rifs[rif_index];
6149 }
6150
6151 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6152 {
6153         return rif->rif_index;
6154 }
6155
6156 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6157 {
6158         return lb_rif->common.rif_index;
6159 }
6160
6161 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6162 {
6163         return lb_rif->ul_vr_id;
6164 }
6165
6166 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6167 {
6168         return rif->dev->ifindex;
6169 }
6170
6171 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6172 {
6173         return rif->dev;
6174 }
6175
6176 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6177 {
6178         return rif->fid;
6179 }
6180
6181 static struct mlxsw_sp_rif *
6182 mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6183                     const struct mlxsw_sp_rif_params *params,
6184                     struct netlink_ext_ack *extack)
6185 {
6186         u32 tb_id = l3mdev_fib_table(params->dev);
6187         const struct mlxsw_sp_rif_ops *ops;
6188         struct mlxsw_sp_fid *fid = NULL;
6189         enum mlxsw_sp_rif_type type;
6190         struct mlxsw_sp_rif *rif;
6191         struct mlxsw_sp_vr *vr;
6192         u16 rif_index;
6193         int i, err;
6194
6195         type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6196         ops = mlxsw_sp->router->rif_ops_arr[type];
6197
6198         vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6199         if (IS_ERR(vr))
6200                 return ERR_CAST(vr);
6201         vr->rif_count++;
6202
6203         err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6204         if (err) {
6205                 NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6206                 goto err_rif_index_alloc;
6207         }
6208
6209         rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6210         if (!rif) {
6211                 err = -ENOMEM;
6212                 goto err_rif_alloc;
6213         }
6214         rif->mlxsw_sp = mlxsw_sp;
6215         rif->ops = ops;
6216
6217         if (ops->fid_get) {
6218                 fid = ops->fid_get(rif, extack);
6219                 if (IS_ERR(fid)) {
6220                         err = PTR_ERR(fid);
6221                         goto err_fid_get;
6222                 }
6223                 rif->fid = fid;
6224         }
6225
6226         if (ops->setup)
6227                 ops->setup(rif, params);
6228
6229         err = ops->configure(rif);
6230         if (err)
6231                 goto err_configure;
6232
6233         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6234                 err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6235                 if (err)
6236                         goto err_mr_rif_add;
6237         }
6238
6239         mlxsw_sp_rif_counters_alloc(rif);
6240         mlxsw_sp->router->rifs[rif_index] = rif;
6241
6242         return rif;
6243
6244 err_mr_rif_add:
6245         for (i--; i >= 0; i--)
6246                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6247         ops->deconfigure(rif);
6248 err_configure:
6249         if (fid)
6250                 mlxsw_sp_fid_put(fid);
6251 err_fid_get:
6252         kfree(rif);
6253 err_rif_alloc:
6254 err_rif_index_alloc:
6255         vr->rif_count--;
6256         mlxsw_sp_vr_put(mlxsw_sp, vr);
6257         return ERR_PTR(err);
6258 }
6259
6260 void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6261 {
6262         const struct mlxsw_sp_rif_ops *ops = rif->ops;
6263         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6264         struct mlxsw_sp_fid *fid = rif->fid;
6265         struct mlxsw_sp_vr *vr;
6266         int i;
6267
6268         mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6269         vr = &mlxsw_sp->router->vrs[rif->vr_id];
6270
6271         mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6272         mlxsw_sp_rif_counters_free(rif);
6273         for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6274                 mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6275         ops->deconfigure(rif);
6276         if (fid)
6277                 /* Loopback RIFs are not associated with a FID. */
6278                 mlxsw_sp_fid_put(fid);
6279         kfree(rif);
6280         vr->rif_count--;
6281         mlxsw_sp_vr_put(mlxsw_sp, vr);
6282 }
6283
6284 void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6285                                  struct net_device *dev)
6286 {
6287         struct mlxsw_sp_rif *rif;
6288
6289         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6290         if (!rif)
6291                 return;
6292         mlxsw_sp_rif_destroy(rif);
6293 }
6294
6295 static void
6296 mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6297                                  struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6298 {
6299         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6300
6301         params->vid = mlxsw_sp_port_vlan->vid;
6302         params->lag = mlxsw_sp_port->lagged;
6303         if (params->lag)
6304                 params->lag_id = mlxsw_sp_port->lag_id;
6305         else
6306                 params->system_port = mlxsw_sp_port->local_port;
6307 }
6308
6309 static int
6310 mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6311                                struct net_device *l3_dev,
6312                                struct netlink_ext_ack *extack)
6313 {
6314         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6315         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6316         u16 vid = mlxsw_sp_port_vlan->vid;
6317         struct mlxsw_sp_rif *rif;
6318         struct mlxsw_sp_fid *fid;
6319         int err;
6320
6321         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6322         if (!rif) {
6323                 struct mlxsw_sp_rif_params params = {
6324                         .dev = l3_dev,
6325                 };
6326
6327                 mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6328                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6329                 if (IS_ERR(rif))
6330                         return PTR_ERR(rif);
6331         }
6332
6333         /* FID was already created, just take a reference */
6334         fid = rif->ops->fid_get(rif, extack);
6335         err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6336         if (err)
6337                 goto err_fid_port_vid_map;
6338
6339         err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6340         if (err)
6341                 goto err_port_vid_learning_set;
6342
6343         err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6344                                         BR_STATE_FORWARDING);
6345         if (err)
6346                 goto err_port_vid_stp_set;
6347
6348         mlxsw_sp_port_vlan->fid = fid;
6349
6350         return 0;
6351
6352 err_port_vid_stp_set:
6353         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6354 err_port_vid_learning_set:
6355         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6356 err_fid_port_vid_map:
6357         mlxsw_sp_fid_put(fid);
6358         return err;
6359 }
6360
6361 void
6362 mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6363 {
6364         struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6365         struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6366         u16 vid = mlxsw_sp_port_vlan->vid;
6367
6368         if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6369                 return;
6370
6371         mlxsw_sp_port_vlan->fid = NULL;
6372         mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6373         mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6374         mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6375         /* If router port holds the last reference on the rFID, then the
6376          * associated Sub-port RIF will be destroyed.
6377          */
6378         mlxsw_sp_fid_put(fid);
6379 }
6380
6381 static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6382                                              struct net_device *port_dev,
6383                                              unsigned long event, u16 vid,
6384                                              struct netlink_ext_ack *extack)
6385 {
6386         struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6387         struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6388
6389         mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6390         if (WARN_ON(!mlxsw_sp_port_vlan))
6391                 return -EINVAL;
6392
6393         switch (event) {
6394         case NETDEV_UP:
6395                 return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6396                                                       l3_dev, extack);
6397         case NETDEV_DOWN:
6398                 mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6399                 break;
6400         }
6401
6402         return 0;
6403 }
6404
6405 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6406                                         unsigned long event,
6407                                         struct netlink_ext_ack *extack)
6408 {
6409         if (netif_is_bridge_port(port_dev) ||
6410             netif_is_lag_port(port_dev) ||
6411             netif_is_ovs_port(port_dev))
6412                 return 0;
6413
6414         return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
6415                                                  extack);
6416 }
6417
6418 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6419                                          struct net_device *lag_dev,
6420                                          unsigned long event, u16 vid,
6421                                          struct netlink_ext_ack *extack)
6422 {
6423         struct net_device *port_dev;
6424         struct list_head *iter;
6425         int err;
6426
6427         netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6428                 if (mlxsw_sp_port_dev_check(port_dev)) {
6429                         err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6430                                                                 port_dev,
6431                                                                 event, vid,
6432                                                                 extack);
6433                         if (err)
6434                                 return err;
6435                 }
6436         }
6437
6438         return 0;
6439 }
6440
6441 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6442                                        unsigned long event,
6443                                        struct netlink_ext_ack *extack)
6444 {
6445         if (netif_is_bridge_port(lag_dev))
6446                 return 0;
6447
6448         return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
6449                                              extack);
6450 }
6451
6452 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
6453                                           unsigned long event,
6454                                           struct netlink_ext_ack *extack)
6455 {
6456         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6457         struct mlxsw_sp_rif_params params = {
6458                 .dev = l3_dev,
6459         };
6460         struct mlxsw_sp_rif *rif;
6461
6462         switch (event) {
6463         case NETDEV_UP:
6464                 rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6465                 if (IS_ERR(rif))
6466                         return PTR_ERR(rif);
6467                 break;
6468         case NETDEV_DOWN:
6469                 rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6470                 mlxsw_sp_rif_destroy(rif);
6471                 break;
6472         }
6473
6474         return 0;
6475 }
6476
6477 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
6478                                         unsigned long event,
6479                                         struct netlink_ext_ack *extack)
6480 {
6481         struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6482         u16 vid = vlan_dev_vlan_id(vlan_dev);
6483
6484         if (netif_is_bridge_port(vlan_dev))
6485                 return 0;
6486
6487         if (mlxsw_sp_port_dev_check(real_dev))
6488                 return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6489                                                          event, vid, extack);
6490         else if (netif_is_lag_master(real_dev))
6491                 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6492                                                      vid, extack);
6493         else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6494                 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
6495
6496         return 0;
6497 }
6498
6499 static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6500 {
6501         u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6502         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6503
6504         return ether_addr_equal_masked(mac, vrrp4, mask);
6505 }
6506
6507 static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6508 {
6509         u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6510         u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6511
6512         return ether_addr_equal_masked(mac, vrrp6, mask);
6513 }
6514
6515 static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6516                                 const u8 *mac, bool adding)
6517 {
6518         char ritr_pl[MLXSW_REG_RITR_LEN];
6519         u8 vrrp_id = adding ? mac[5] : 0;
6520         int err;
6521
6522         if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6523             !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6524                 return 0;
6525
6526         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6527         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6528         if (err)
6529                 return err;
6530
6531         if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6532                 mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6533         else
6534                 mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6535
6536         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6537 }
6538
6539 static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6540                                     const struct net_device *macvlan_dev,
6541                                     struct netlink_ext_ack *extack)
6542 {
6543         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6544         struct mlxsw_sp_rif *rif;
6545         int err;
6546
6547         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6548         if (!rif) {
6549                 NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6550                 return -EOPNOTSUPP;
6551         }
6552
6553         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6554                                   mlxsw_sp_fid_index(rif->fid), true);
6555         if (err)
6556                 return err;
6557
6558         err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6559                                    macvlan_dev->dev_addr, true);
6560         if (err)
6561                 goto err_rif_vrrp_add;
6562
6563         /* Make sure the bridge driver does not have this MAC pointing at
6564          * some other port.
6565          */
6566         if (rif->ops->fdb_del)
6567                 rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6568
6569         return 0;
6570
6571 err_rif_vrrp_add:
6572         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6573                             mlxsw_sp_fid_index(rif->fid), false);
6574         return err;
6575 }
6576
6577 void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6578                               const struct net_device *macvlan_dev)
6579 {
6580         struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6581         struct mlxsw_sp_rif *rif;
6582
6583         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6584         /* If we do not have a RIF, then we already took care of
6585          * removing the macvlan's MAC during RIF deletion.
6586          */
6587         if (!rif)
6588                 return;
6589         mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6590                              false);
6591         mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6592                             mlxsw_sp_fid_index(rif->fid), false);
6593 }
6594
6595 static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev,
6596                                            unsigned long event,
6597                                            struct netlink_ext_ack *extack)
6598 {
6599         struct mlxsw_sp *mlxsw_sp;
6600
6601         mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev);
6602         if (!mlxsw_sp)
6603                 return 0;
6604
6605         switch (event) {
6606         case NETDEV_UP:
6607                 return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6608         case NETDEV_DOWN:
6609                 mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6610                 break;
6611         }
6612
6613         return 0;
6614 }
6615
6616 static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
6617                                      unsigned long event,
6618                                      struct netlink_ext_ack *extack)
6619 {
6620         if (mlxsw_sp_port_dev_check(dev))
6621                 return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6622         else if (netif_is_lag_master(dev))
6623                 return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6624         else if (netif_is_bridge_master(dev))
6625                 return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
6626         else if (is_vlan_dev(dev))
6627                 return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
6628         else if (netif_is_macvlan(dev))
6629                 return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack);
6630         else
6631                 return 0;
6632 }
6633
6634 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
6635                             unsigned long event, void *ptr)
6636 {
6637         struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6638         struct net_device *dev = ifa->ifa_dev->dev;
6639         struct mlxsw_sp *mlxsw_sp;
6640         struct mlxsw_sp_rif *rif;
6641         int err = 0;
6642
6643         /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6644         if (event == NETDEV_UP)
6645                 goto out;
6646
6647         mlxsw_sp = mlxsw_sp_lower_get(dev);
6648         if (!mlxsw_sp)
6649                 goto out;
6650
6651         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6652         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6653                 goto out;
6654
6655         err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
6656 out:
6657         return notifier_from_errno(err);
6658 }
6659
6660 int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6661                                   unsigned long event, void *ptr)
6662 {
6663         struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6664         struct net_device *dev = ivi->ivi_dev->dev;
6665         struct mlxsw_sp *mlxsw_sp;
6666         struct mlxsw_sp_rif *rif;
6667         int err = 0;
6668
6669         mlxsw_sp = mlxsw_sp_lower_get(dev);
6670         if (!mlxsw_sp)
6671                 goto out;
6672
6673         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6674         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6675                 goto out;
6676
6677         err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
6678 out:
6679         return notifier_from_errno(err);
6680 }
6681
6682 struct mlxsw_sp_inet6addr_event_work {
6683         struct work_struct work;
6684         struct net_device *dev;
6685         unsigned long event;
6686 };
6687
6688 static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6689 {
6690         struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6691                 container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6692         struct net_device *dev = inet6addr_work->dev;
6693         unsigned long event = inet6addr_work->event;
6694         struct mlxsw_sp *mlxsw_sp;
6695         struct mlxsw_sp_rif *rif;
6696
6697         rtnl_lock();
6698         mlxsw_sp = mlxsw_sp_lower_get(dev);
6699         if (!mlxsw_sp)
6700                 goto out;
6701
6702         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6703         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6704                 goto out;
6705
6706         __mlxsw_sp_inetaddr_event(dev, event, NULL);
6707 out:
6708         rtnl_unlock();
6709         dev_put(dev);
6710         kfree(inet6addr_work);
6711 }
6712
6713 /* Called with rcu_read_lock() */
6714 int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
6715                              unsigned long event, void *ptr)
6716 {
6717         struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
6718         struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
6719         struct net_device *dev = if6->idev->dev;
6720
6721         /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
6722         if (event == NETDEV_UP)
6723                 return NOTIFY_DONE;
6724
6725         if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
6726                 return NOTIFY_DONE;
6727
6728         inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
6729         if (!inet6addr_work)
6730                 return NOTIFY_BAD;
6731
6732         INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
6733         inet6addr_work->dev = dev;
6734         inet6addr_work->event = event;
6735         dev_hold(dev);
6736         mlxsw_core_schedule_work(&inet6addr_work->work);
6737
6738         return NOTIFY_DONE;
6739 }
6740
6741 int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
6742                                    unsigned long event, void *ptr)
6743 {
6744         struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
6745         struct net_device *dev = i6vi->i6vi_dev->dev;
6746         struct mlxsw_sp *mlxsw_sp;
6747         struct mlxsw_sp_rif *rif;
6748         int err = 0;
6749
6750         mlxsw_sp = mlxsw_sp_lower_get(dev);
6751         if (!mlxsw_sp)
6752                 goto out;
6753
6754         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6755         if (!mlxsw_sp_rif_should_config(rif, dev, event))
6756                 goto out;
6757
6758         err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
6759 out:
6760         return notifier_from_errno(err);
6761 }
6762
6763 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6764                              const char *mac, int mtu)
6765 {
6766         char ritr_pl[MLXSW_REG_RITR_LEN];
6767         int err;
6768
6769         mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6770         err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6771         if (err)
6772                 return err;
6773
6774         mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
6775         mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
6776         mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
6777         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6778 }
6779
6780 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
6781 {
6782         struct mlxsw_sp *mlxsw_sp;
6783         struct mlxsw_sp_rif *rif;
6784         u16 fid_index;
6785         int err;
6786
6787         mlxsw_sp = mlxsw_sp_lower_get(dev);
6788         if (!mlxsw_sp)
6789                 return 0;
6790
6791         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6792         if (!rif)
6793                 return 0;
6794         fid_index = mlxsw_sp_fid_index(rif->fid);
6795
6796         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
6797         if (err)
6798                 return err;
6799
6800         err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
6801                                 dev->mtu);
6802         if (err)
6803                 goto err_rif_edit;
6804
6805         err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
6806         if (err)
6807                 goto err_rif_fdb_op;
6808
6809         if (rif->mtu != dev->mtu) {
6810                 struct mlxsw_sp_vr *vr;
6811                 int i;
6812
6813                 /* The RIF is relevant only to its mr_table instance, as unlike
6814                  * unicast routing, in multicast routing a RIF cannot be shared
6815                  * between several multicast routing tables.
6816                  */
6817                 vr = &mlxsw_sp->router->vrs[rif->vr_id];
6818                 for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6819                         mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
6820                                                    rif, dev->mtu);
6821         }
6822
6823         ether_addr_copy(rif->addr, dev->dev_addr);
6824         rif->mtu = dev->mtu;
6825
6826         netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
6827
6828         return 0;
6829
6830 err_rif_fdb_op:
6831         mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
6832 err_rif_edit:
6833         mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
6834         return err;
6835 }
6836
6837 static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
6838                                   struct net_device *l3_dev,
6839                                   struct netlink_ext_ack *extack)
6840 {
6841         struct mlxsw_sp_rif *rif;
6842
6843         /* If netdev is already associated with a RIF, then we need to
6844          * destroy it and create a new one with the new virtual router ID.
6845          */
6846         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6847         if (rif)
6848                 __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
6849
6850         return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
6851 }
6852
6853 static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
6854                                     struct net_device *l3_dev)
6855 {
6856         struct mlxsw_sp_rif *rif;
6857
6858         rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6859         if (!rif)
6860                 return;
6861         __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
6862 }
6863
6864 int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
6865                                  struct netdev_notifier_changeupper_info *info)
6866 {
6867         struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
6868         int err = 0;
6869
6870         /* We do not create a RIF for a macvlan, but only use it to
6871          * direct more MAC addresses to the router.
6872          */
6873         if (!mlxsw_sp || netif_is_macvlan(l3_dev))
6874                 return 0;
6875
6876         switch (event) {
6877         case NETDEV_PRECHANGEUPPER:
6878                 return 0;
6879         case NETDEV_CHANGEUPPER:
6880                 if (info->linking) {
6881                         struct netlink_ext_ack *extack;
6882
6883                         extack = netdev_notifier_info_to_extack(&info->info);
6884                         err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
6885                 } else {
6886                         mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
6887                 }
6888                 break;
6889         }
6890
6891         return err;
6892 }
6893
6894 static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
6895 {
6896         struct mlxsw_sp_rif *rif = data;
6897
6898         if (!netif_is_macvlan(dev))
6899                 return 0;
6900
6901         return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
6902                                    mlxsw_sp_fid_index(rif->fid), false);
6903 }
6904
6905 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
6906 {
6907         if (!netif_is_macvlan_port(rif->dev))
6908                 return 0;
6909
6910         netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
6911         return netdev_walk_all_upper_dev_rcu(rif->dev,
6912                                              __mlxsw_sp_rif_macvlan_flush, rif);
6913 }
6914
6915 static struct mlxsw_sp_rif_subport *
6916 mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6917 {
6918         return container_of(rif, struct mlxsw_sp_rif_subport, common);
6919 }
6920
6921 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
6922                                        const struct mlxsw_sp_rif_params *params)
6923 {
6924         struct mlxsw_sp_rif_subport *rif_subport;
6925
6926         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6927         rif_subport->vid = params->vid;
6928         rif_subport->lag = params->lag;
6929         if (params->lag)
6930                 rif_subport->lag_id = params->lag_id;
6931         else
6932                 rif_subport->system_port = params->system_port;
6933 }
6934
6935 static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
6936 {
6937         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6938         struct mlxsw_sp_rif_subport *rif_subport;
6939         char ritr_pl[MLXSW_REG_RITR_LEN];
6940
6941         rif_subport = mlxsw_sp_rif_subport_rif(rif);
6942         mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
6943                             rif->rif_index, rif->vr_id, rif->dev->mtu);
6944         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
6945         mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
6946                                   rif_subport->lag ? rif_subport->lag_id :
6947                                                      rif_subport->system_port,
6948                                   rif_subport->vid);
6949
6950         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6951 }
6952
6953 static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
6954 {
6955         int err;
6956
6957         err = mlxsw_sp_rif_subport_op(rif, true);
6958         if (err)
6959                 return err;
6960
6961         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6962                                   mlxsw_sp_fid_index(rif->fid), true);
6963         if (err)
6964                 goto err_rif_fdb_op;
6965
6966         mlxsw_sp_fid_rif_set(rif->fid, rif);
6967         return 0;
6968
6969 err_rif_fdb_op:
6970         mlxsw_sp_rif_subport_op(rif, false);
6971         return err;
6972 }
6973
6974 static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
6975 {
6976         struct mlxsw_sp_fid *fid = rif->fid;
6977
6978         mlxsw_sp_fid_rif_set(fid, NULL);
6979         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
6980                             mlxsw_sp_fid_index(fid), false);
6981         mlxsw_sp_rif_macvlan_flush(rif);
6982         mlxsw_sp_rif_subport_op(rif, false);
6983 }
6984
6985 static struct mlxsw_sp_fid *
6986 mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
6987                              struct netlink_ext_ack *extack)
6988 {
6989         return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
6990 }
6991
6992 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
6993         .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
6994         .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
6995         .setup                  = mlxsw_sp_rif_subport_setup,
6996         .configure              = mlxsw_sp_rif_subport_configure,
6997         .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
6998         .fid_get                = mlxsw_sp_rif_subport_fid_get,
6999 };
7000
7001 static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7002                                     enum mlxsw_reg_ritr_if_type type,
7003                                     u16 vid_fid, bool enable)
7004 {
7005         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7006         char ritr_pl[MLXSW_REG_RITR_LEN];
7007
7008         mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7009                             rif->dev->mtu);
7010         mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7011         mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7012
7013         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7014 }
7015
7016 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7017 {
7018         return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7019 }
7020
7021 static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7022 {
7023         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7024         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7025         int err;
7026
7027         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7028         if (err)
7029                 return err;
7030
7031         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7032                                      mlxsw_sp_router_port(mlxsw_sp), true);
7033         if (err)
7034                 goto err_fid_mc_flood_set;
7035
7036         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7037                                      mlxsw_sp_router_port(mlxsw_sp), true);
7038         if (err)
7039                 goto err_fid_bc_flood_set;
7040
7041         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7042                                   mlxsw_sp_fid_index(rif->fid), true);
7043         if (err)
7044                 goto err_rif_fdb_op;
7045
7046         mlxsw_sp_fid_rif_set(rif->fid, rif);
7047         return 0;
7048
7049 err_rif_fdb_op:
7050         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7051                                mlxsw_sp_router_port(mlxsw_sp), false);
7052 err_fid_bc_flood_set:
7053         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7054                                mlxsw_sp_router_port(mlxsw_sp), false);
7055 err_fid_mc_flood_set:
7056         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7057         return err;
7058 }
7059
7060 static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7061 {
7062         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7063         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7064         struct mlxsw_sp_fid *fid = rif->fid;
7065
7066         mlxsw_sp_fid_rif_set(fid, NULL);
7067         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7068                             mlxsw_sp_fid_index(fid), false);
7069         mlxsw_sp_rif_macvlan_flush(rif);
7070         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7071                                mlxsw_sp_router_port(mlxsw_sp), false);
7072         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7073                                mlxsw_sp_router_port(mlxsw_sp), false);
7074         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7075 }
7076
7077 static struct mlxsw_sp_fid *
7078 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7079                           struct netlink_ext_ack *extack)
7080 {
7081         u16 vid;
7082         int err;
7083
7084         if (is_vlan_dev(rif->dev)) {
7085                 vid = vlan_dev_vlan_id(rif->dev);
7086         } else {
7087                 err = br_vlan_get_pvid(rif->dev, &vid);
7088                 if (err < 0 || !vid) {
7089                         NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7090                         return ERR_PTR(-EINVAL);
7091                 }
7092         }
7093
7094         return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7095 }
7096
7097 static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7098 {
7099         u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7100         struct switchdev_notifier_fdb_info info;
7101         struct net_device *br_dev;
7102         struct net_device *dev;
7103
7104         br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7105         dev = br_fdb_find_port(br_dev, mac, vid);
7106         if (!dev)
7107                 return;
7108
7109         info.addr = mac;
7110         info.vid = vid;
7111         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7112 }
7113
7114 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7115         .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7116         .rif_size               = sizeof(struct mlxsw_sp_rif),
7117         .configure              = mlxsw_sp_rif_vlan_configure,
7118         .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7119         .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7120         .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7121 };
7122
7123 static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7124 {
7125         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7126         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7127         int err;
7128
7129         err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7130                                        true);
7131         if (err)
7132                 return err;
7133
7134         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7135                                      mlxsw_sp_router_port(mlxsw_sp), true);
7136         if (err)
7137                 goto err_fid_mc_flood_set;
7138
7139         err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7140                                      mlxsw_sp_router_port(mlxsw_sp), true);
7141         if (err)
7142                 goto err_fid_bc_flood_set;
7143
7144         err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7145                                   mlxsw_sp_fid_index(rif->fid), true);
7146         if (err)
7147                 goto err_rif_fdb_op;
7148
7149         mlxsw_sp_fid_rif_set(rif->fid, rif);
7150         return 0;
7151
7152 err_rif_fdb_op:
7153         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7154                                mlxsw_sp_router_port(mlxsw_sp), false);
7155 err_fid_bc_flood_set:
7156         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7157                                mlxsw_sp_router_port(mlxsw_sp), false);
7158 err_fid_mc_flood_set:
7159         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7160         return err;
7161 }
7162
7163 static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7164 {
7165         u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7166         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7167         struct mlxsw_sp_fid *fid = rif->fid;
7168
7169         mlxsw_sp_fid_rif_set(fid, NULL);
7170         mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7171                             mlxsw_sp_fid_index(fid), false);
7172         mlxsw_sp_rif_macvlan_flush(rif);
7173         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7174                                mlxsw_sp_router_port(mlxsw_sp), false);
7175         mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7176                                mlxsw_sp_router_port(mlxsw_sp), false);
7177         mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7178 }
7179
7180 static struct mlxsw_sp_fid *
7181 mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7182                          struct netlink_ext_ack *extack)
7183 {
7184         return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7185 }
7186
7187 static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7188 {
7189         struct switchdev_notifier_fdb_info info;
7190         struct net_device *dev;
7191
7192         dev = br_fdb_find_port(rif->dev, mac, 0);
7193         if (!dev)
7194                 return;
7195
7196         info.addr = mac;
7197         info.vid = 0;
7198         call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
7199 }
7200
7201 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7202         .type                   = MLXSW_SP_RIF_TYPE_FID,
7203         .rif_size               = sizeof(struct mlxsw_sp_rif),
7204         .configure              = mlxsw_sp_rif_fid_configure,
7205         .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7206         .fid_get                = mlxsw_sp_rif_fid_fid_get,
7207         .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7208 };
7209
7210 static struct mlxsw_sp_rif_ipip_lb *
7211 mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7212 {
7213         return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7214 }
7215
7216 static void
7217 mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7218                            const struct mlxsw_sp_rif_params *params)
7219 {
7220         struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7221         struct mlxsw_sp_rif_ipip_lb *rif_lb;
7222
7223         params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7224                                  common);
7225         rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7226         rif_lb->lb_config = params_lb->lb_config;
7227 }
7228
7229 static int
7230 mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7231 {
7232         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7233         u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7234         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7235         struct mlxsw_sp_vr *ul_vr;
7236         int err;
7237
7238         ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7239         if (IS_ERR(ul_vr))
7240                 return PTR_ERR(ul_vr);
7241
7242         err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true);
7243         if (err)
7244                 goto err_loopback_op;
7245
7246         lb_rif->ul_vr_id = ul_vr->id;
7247         ++ul_vr->rif_count;
7248         return 0;
7249
7250 err_loopback_op:
7251         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7252         return err;
7253 }
7254
7255 static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7256 {
7257         struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7258         struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7259         struct mlxsw_sp_vr *ul_vr;
7260
7261         ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7262         mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
7263
7264         --ul_vr->rif_count;
7265         mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7266 }
7267
7268 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
7269         .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7270         .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7271         .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7272         .configure              = mlxsw_sp_rif_ipip_lb_configure,
7273         .deconfigure            = mlxsw_sp_rif_ipip_lb_deconfigure,
7274 };
7275
7276 static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = {
7277         [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7278         [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_ops,
7279         [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7280         [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp_rif_ipip_lb_ops,
7281 };
7282
7283 static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7284 {
7285         u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7286
7287         mlxsw_sp->router->rifs = kcalloc(max_rifs,
7288                                          sizeof(struct mlxsw_sp_rif *),
7289                                          GFP_KERNEL);
7290         if (!mlxsw_sp->router->rifs)
7291                 return -ENOMEM;
7292
7293         mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr;
7294
7295         return 0;
7296 }
7297
7298 static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7299 {
7300         int i;
7301
7302         for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7303                 WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7304
7305         kfree(mlxsw_sp->router->rifs);
7306 }
7307
7308 static int
7309 mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7310 {
7311         char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7312
7313         mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7314         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7315 }
7316
7317 static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7318 {
7319         mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7320         INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7321         return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7322 }
7323
7324 static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7325 {
7326         WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7327 }
7328
7329 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7330 {
7331         struct mlxsw_sp_router *router;
7332
7333         /* Flush pending FIB notifications and then flush the device's
7334          * table before requesting another dump. The FIB notification
7335          * block is unregistered, so no need to take RTNL.
7336          */
7337         mlxsw_core_flush_owq();
7338         router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7339         mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7340 }
7341
7342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
7343 static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7344 {
7345         mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7346 }
7347
7348 static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7349 {
7350         mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7351 }
7352
7353 static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7354 {
7355         bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7356
7357         mlxsw_sp_mp_hash_header_set(recr2_pl,
7358                                     MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7359         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7360         mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7361         mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7362         if (only_l3)
7363                 return;
7364         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7365         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7366         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7367         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7368 }
7369
7370 static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7371 {
7372         bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7373
7374         mlxsw_sp_mp_hash_header_set(recr2_pl,
7375                                     MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7376         mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7377         mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7378         mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7379         mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7380         if (only_l3) {
7381                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7382                                            MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7383         } else {
7384                 mlxsw_sp_mp_hash_header_set(recr2_pl,
7385                                             MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7386                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7387                                            MLXSW_REG_RECR2_TCP_UDP_SPORT);
7388                 mlxsw_sp_mp_hash_field_set(recr2_pl,
7389                                            MLXSW_REG_RECR2_TCP_UDP_DPORT);
7390         }
7391 }
7392
7393 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7394 {
7395         char recr2_pl[MLXSW_REG_RECR2_LEN];
7396         u32 seed;
7397
7398         get_random_bytes(&seed, sizeof(seed));
7399         mlxsw_reg_recr2_pack(recr2_pl, seed);
7400         mlxsw_sp_mp4_hash_init(recr2_pl);
7401         mlxsw_sp_mp6_hash_init(recr2_pl);
7402
7403         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7404 }
7405 #else
7406 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7407 {
7408         return 0;
7409 }
7410 #endif
7411
7412 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7413 {
7414         char rdpm_pl[MLXSW_REG_RDPM_LEN];
7415         unsigned int i;
7416
7417         MLXSW_REG_ZERO(rdpm, rdpm_pl);
7418
7419         /* HW is determining switch priority based on DSCP-bits, but the
7420          * kernel is still doing that based on the ToS. Since there's a
7421          * mismatch in bits we need to make sure to translate the right
7422          * value ToS would observe, skipping the 2 least-significant ECN bits.
7423          */
7424         for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7425                 mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7426
7427         return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7428 }
7429
7430 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7431 {
7432         bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7433         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7434         u64 max_rifs;
7435         int err;
7436
7437         if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7438                 return -EIO;
7439         max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7440
7441         mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7442         mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7443         mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7444         err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7445         if (err)
7446                 return err;
7447         return 0;
7448 }
7449
7450 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7451 {
7452         char rgcr_pl[MLXSW_REG_RGCR_LEN];
7453
7454         mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7455         mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7456 }
7457
7458 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7459 {
7460         struct mlxsw_sp_router *router;
7461         int err;
7462
7463         router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7464         if (!router)
7465                 return -ENOMEM;
7466         mlxsw_sp->router = router;
7467         router->mlxsw_sp = mlxsw_sp;
7468
7469         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7470         err = __mlxsw_sp_router_init(mlxsw_sp);
7471         if (err)
7472                 goto err_router_init;
7473
7474         err = mlxsw_sp_rifs_init(mlxsw_sp);
7475         if (err)
7476                 goto err_rifs_init;
7477
7478         err = mlxsw_sp_ipips_init(mlxsw_sp);
7479         if (err)
7480                 goto err_ipips_init;
7481
7482         err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
7483                               &mlxsw_sp_nexthop_ht_params);
7484         if (err)
7485                 goto err_nexthop_ht_init;
7486
7487         err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
7488                               &mlxsw_sp_nexthop_group_ht_params);
7489         if (err)
7490                 goto err_nexthop_group_ht_init;
7491
7492         INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
7493         err = mlxsw_sp_lpm_init(mlxsw_sp);
7494         if (err)
7495                 goto err_lpm_init;
7496
7497         err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
7498         if (err)
7499                 goto err_mr_init;
7500
7501         err = mlxsw_sp_vrs_init(mlxsw_sp);
7502         if (err)
7503                 goto err_vrs_init;
7504
7505         err = mlxsw_sp_neigh_init(mlxsw_sp);
7506         if (err)
7507                 goto err_neigh_init;
7508
7509         mlxsw_sp->router->netevent_nb.notifier_call =
7510                 mlxsw_sp_router_netevent_event;
7511         err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7512         if (err)
7513                 goto err_register_netevent_notifier;
7514
7515         err = mlxsw_sp_mp_hash_init(mlxsw_sp);
7516         if (err)
7517                 goto err_mp_hash_init;
7518
7519         err = mlxsw_sp_dscp_init(mlxsw_sp);
7520         if (err)
7521                 goto err_dscp_init;
7522
7523         mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
7524         err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
7525                                     mlxsw_sp_router_fib_dump_flush);
7526         if (err)
7527                 goto err_register_fib_notifier;
7528
7529         return 0;
7530
7531 err_register_fib_notifier:
7532 err_dscp_init:
7533 err_mp_hash_init:
7534         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7535 err_register_netevent_notifier:
7536         mlxsw_sp_neigh_fini(mlxsw_sp);
7537 err_neigh_init:
7538         mlxsw_sp_vrs_fini(mlxsw_sp);
7539 err_vrs_init:
7540         mlxsw_sp_mr_fini(mlxsw_sp);
7541 err_mr_init:
7542         mlxsw_sp_lpm_fini(mlxsw_sp);
7543 err_lpm_init:
7544         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7545 err_nexthop_group_ht_init:
7546         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7547 err_nexthop_ht_init:
7548         mlxsw_sp_ipips_fini(mlxsw_sp);
7549 err_ipips_init:
7550         mlxsw_sp_rifs_fini(mlxsw_sp);
7551 err_rifs_init:
7552         __mlxsw_sp_router_fini(mlxsw_sp);
7553 err_router_init:
7554         kfree(mlxsw_sp->router);
7555         return err;
7556 }
7557
7558 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7559 {
7560         unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
7561         unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
7562         mlxsw_sp_neigh_fini(mlxsw_sp);
7563         mlxsw_sp_vrs_fini(mlxsw_sp);
7564         mlxsw_sp_mr_fini(mlxsw_sp);
7565         mlxsw_sp_lpm_fini(mlxsw_sp);
7566         rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
7567         rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
7568         mlxsw_sp_ipips_fini(mlxsw_sp);
7569         mlxsw_sp_rifs_fini(mlxsw_sp);
7570         __mlxsw_sp_router_fini(mlxsw_sp);
7571         kfree(mlxsw_sp->router);
7572 }