GNU Linux-libre 6.8.7-gnu
[releases.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005 - 2016 Broadcom
4  * All rights reserved.
5  *
6  * Contact Information:
7  * linux-drivers@emulex.com
8  *
9  * Emulex
10  * 3333 Susan Street
11  * Costa Mesa, CA 92626
12  */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/if_bridge.h>
20 #include <net/busy_poll.h>
21 #include <net/vxlan.h>
22
23 MODULE_DESCRIPTION(DRV_DESC);
24 MODULE_AUTHOR("Emulex Corporation");
25 MODULE_LICENSE("GPL");
26
27 /* num_vfs module param is obsolete.
28  * Use sysfs method to enable/disable VFs.
29  */
30 static unsigned int num_vfs;
31 module_param(num_vfs, uint, 0444);
32 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
33
34 static ushort rx_frag_size = 2048;
35 module_param(rx_frag_size, ushort, 0444);
36 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
37
38 /* Per-module error detection/recovery workq shared across all functions.
39  * Each function schedules its own work request on this shared workq.
40  */
41 static struct workqueue_struct *be_err_recovery_workq;
42
43 static const struct pci_device_id be_dev_ids[] = {
44 #ifdef CONFIG_BE2NET_BE2
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
47 #endif /* CONFIG_BE2NET_BE2 */
48 #ifdef CONFIG_BE2NET_BE3
49         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
50         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
51 #endif /* CONFIG_BE2NET_BE3 */
52 #ifdef CONFIG_BE2NET_LANCER
53         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
55 #endif /* CONFIG_BE2NET_LANCER */
56 #ifdef CONFIG_BE2NET_SKYHAWK
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
58         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
59 #endif /* CONFIG_BE2NET_SKYHAWK */
60         { 0 }
61 };
62 MODULE_DEVICE_TABLE(pci, be_dev_ids);
63
64 /* Workqueue used by all functions for defering cmd calls to the adapter */
65 static struct workqueue_struct *be_wq;
66
67 /* UE Status Low CSR */
68 static const char * const ue_status_low_desc[] = {
69         "CEV",
70         "CTX",
71         "DBUF",
72         "ERX",
73         "Host",
74         "MPU",
75         "NDMA",
76         "PTC ",
77         "RDMA ",
78         "RXF ",
79         "RXIPS ",
80         "RXULP0 ",
81         "RXULP1 ",
82         "RXULP2 ",
83         "TIM ",
84         "TPOST ",
85         "TPRE ",
86         "TXIPS ",
87         "TXULP0 ",
88         "TXULP1 ",
89         "UC ",
90         "WDMA ",
91         "TXULP2 ",
92         "HOST1 ",
93         "P0_OB_LINK ",
94         "P1_OB_LINK ",
95         "HOST_GPIO ",
96         "MBOX ",
97         "ERX2 ",
98         "SPARE ",
99         "JTAG ",
100         "MPU_INTPEND "
101 };
102
103 /* UE Status High CSR */
104 static const char * const ue_status_hi_desc[] = {
105         "LPCMEMHOST",
106         "MGMT_MAC",
107         "PCS0ONLINE",
108         "MPU_IRAM",
109         "PCS1ONLINE",
110         "PCTL0",
111         "PCTL1",
112         "PMEM",
113         "RR",
114         "TXPB",
115         "RXPP",
116         "XAUI",
117         "TXP",
118         "ARM",
119         "IPC",
120         "HOST2",
121         "HOST3",
122         "HOST4",
123         "HOST5",
124         "HOST6",
125         "HOST7",
126         "ECRC",
127         "Poison TLP",
128         "NETC",
129         "PERIPH",
130         "LLTXULP",
131         "D2P",
132         "RCON",
133         "LDMA",
134         "LLTXP",
135         "LLTXPB",
136         "Unknown"
137 };
138
139 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
140                                  BE_IF_FLAGS_BROADCAST | \
141                                  BE_IF_FLAGS_MULTICAST | \
142                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
143
144 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
145 {
146         struct be_dma_mem *mem = &q->dma_mem;
147
148         if (mem->va) {
149                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
150                                   mem->dma);
151                 mem->va = NULL;
152         }
153 }
154
155 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
156                           u16 len, u16 entry_size)
157 {
158         struct be_dma_mem *mem = &q->dma_mem;
159
160         memset(q, 0, sizeof(*q));
161         q->len = len;
162         q->entry_size = entry_size;
163         mem->size = len * entry_size;
164         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
165                                      &mem->dma, GFP_KERNEL);
166         if (!mem->va)
167                 return -ENOMEM;
168         return 0;
169 }
170
171 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
172 {
173         u32 reg, enabled;
174
175         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
176                               &reg);
177         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
178
179         if (!enabled && enable)
180                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else if (enabled && !enable)
182                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
183         else
184                 return;
185
186         pci_write_config_dword(adapter->pdev,
187                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
188 }
189
190 static void be_intr_set(struct be_adapter *adapter, bool enable)
191 {
192         int status = 0;
193
194         /* On lancer interrupts can't be controlled via this register */
195         if (lancer_chip(adapter))
196                 return;
197
198         if (be_check_error(adapter, BE_ERROR_EEH))
199                 return;
200
201         status = be_cmd_intr_set(adapter, enable);
202         if (status)
203                 be_reg_intr_set(adapter, enable);
204 }
205
206 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
207 {
208         u32 val = 0;
209
210         if (be_check_error(adapter, BE_ERROR_HW))
211                 return;
212
213         val |= qid & DB_RQ_RING_ID_MASK;
214         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
215
216         wmb();
217         iowrite32(val, adapter->db + DB_RQ_OFFSET);
218 }
219
220 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
221                           u16 posted)
222 {
223         u32 val = 0;
224
225         if (be_check_error(adapter, BE_ERROR_HW))
226                 return;
227
228         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
229         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
230
231         wmb();
232         iowrite32(val, adapter->db + txo->db_offset);
233 }
234
235 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
236                          bool arm, bool clear_int, u16 num_popped,
237                          u32 eq_delay_mult_enc)
238 {
239         u32 val = 0;
240
241         val |= qid & DB_EQ_RING_ID_MASK;
242         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
243
244         if (be_check_error(adapter, BE_ERROR_HW))
245                 return;
246
247         if (arm)
248                 val |= 1 << DB_EQ_REARM_SHIFT;
249         if (clear_int)
250                 val |= 1 << DB_EQ_CLR_SHIFT;
251         val |= 1 << DB_EQ_EVNT_SHIFT;
252         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
253         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
254         iowrite32(val, adapter->db + DB_EQ_OFFSET);
255 }
256
257 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
258 {
259         u32 val = 0;
260
261         val |= qid & DB_CQ_RING_ID_MASK;
262         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
263                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
264
265         if (be_check_error(adapter, BE_ERROR_HW))
266                 return;
267
268         if (arm)
269                 val |= 1 << DB_CQ_REARM_SHIFT;
270         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
271         iowrite32(val, adapter->db + DB_CQ_OFFSET);
272 }
273
274 static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
275 {
276         int i;
277
278         /* Check if mac has already been added as part of uc-list */
279         for (i = 0; i < adapter->uc_macs; i++) {
280                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
281                         /* mac already added, skip addition */
282                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
283                         return 0;
284                 }
285         }
286
287         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
288                                &adapter->pmac_id[0], 0);
289 }
290
291 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
292 {
293         int i;
294
295         /* Skip deletion if the programmed mac is
296          * being used in uc-list
297          */
298         for (i = 0; i < adapter->uc_macs; i++) {
299                 if (adapter->pmac_id[i + 1] == pmac_id)
300                         return;
301         }
302         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
303 }
304
305 static int be_mac_addr_set(struct net_device *netdev, void *p)
306 {
307         struct be_adapter *adapter = netdev_priv(netdev);
308         struct device *dev = &adapter->pdev->dev;
309         struct sockaddr *addr = p;
310         int status;
311         u8 mac[ETH_ALEN];
312         u32 old_pmac_id = adapter->pmac_id[0];
313
314         if (!is_valid_ether_addr(addr->sa_data))
315                 return -EADDRNOTAVAIL;
316
317         /* Proceed further only if, User provided MAC is different
318          * from active MAC
319          */
320         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
321                 return 0;
322
323         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
324          * address
325          */
326         if (BEx_chip(adapter) && be_virtfn(adapter) &&
327             !check_privilege(adapter, BE_PRIV_FILTMGMT))
328                 return -EPERM;
329
330         /* if device is not running, copy MAC to netdev->dev_addr */
331         if (!netif_running(netdev))
332                 goto done;
333
334         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
335          * privilege or if PF did not provision the new MAC address.
336          * On BE3, this cmd will always fail if the VF doesn't have the
337          * FILTMGMT privilege. This failure is OK, only if the PF programmed
338          * the MAC for the VF.
339          */
340         mutex_lock(&adapter->rx_filter_lock);
341         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
342         if (!status) {
343
344                 /* Delete the old programmed MAC. This call may fail if the
345                  * old MAC was already deleted by the PF driver.
346                  */
347                 if (adapter->pmac_id[0] != old_pmac_id)
348                         be_dev_mac_del(adapter, old_pmac_id);
349         }
350
351         mutex_unlock(&adapter->rx_filter_lock);
352         /* Decide if the new MAC is successfully activated only after
353          * querying the FW
354          */
355         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
356                                        adapter->if_handle, true, 0);
357         if (status)
358                 goto err;
359
360         /* The MAC change did not happen, either due to lack of privilege
361          * or PF didn't pre-provision.
362          */
363         if (!ether_addr_equal(addr->sa_data, mac)) {
364                 status = -EPERM;
365                 goto err;
366         }
367
368         /* Remember currently programmed MAC */
369         ether_addr_copy(adapter->dev_mac, addr->sa_data);
370 done:
371         eth_hw_addr_set(netdev, addr->sa_data);
372         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
373         return 0;
374 err:
375         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
376         return status;
377 }
378
379 /* BE2 supports only v0 cmd */
380 static void *hw_stats_from_cmd(struct be_adapter *adapter)
381 {
382         if (BE2_chip(adapter)) {
383                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
384
385                 return &cmd->hw_stats;
386         } else if (BE3_chip(adapter)) {
387                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
388
389                 return &cmd->hw_stats;
390         } else {
391                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
392
393                 return &cmd->hw_stats;
394         }
395 }
396
397 /* BE2 supports only v0 cmd */
398 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
399 {
400         if (BE2_chip(adapter)) {
401                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
402
403                 return &hw_stats->erx;
404         } else if (BE3_chip(adapter)) {
405                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
406
407                 return &hw_stats->erx;
408         } else {
409                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
410
411                 return &hw_stats->erx;
412         }
413 }
414
415 static void populate_be_v0_stats(struct be_adapter *adapter)
416 {
417         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
418         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
419         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
420         struct be_port_rxf_stats_v0 *port_stats =
421                                         &rxf_stats->port[adapter->port_num];
422         struct be_drv_stats *drvs = &adapter->drv_stats;
423
424         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
435         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
436         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
437         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
438         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
439         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
440         drvs->rx_dropped_header_too_small =
441                 port_stats->rx_dropped_header_too_small;
442         drvs->rx_address_filtered =
443                                         port_stats->rx_address_filtered +
444                                         port_stats->rx_vlan_filtered;
445         drvs->rx_alignment_symbol_errors =
446                 port_stats->rx_alignment_symbol_errors;
447
448         drvs->tx_pauseframes = port_stats->tx_pauseframes;
449         drvs->tx_controlframes = port_stats->tx_controlframes;
450
451         if (adapter->port_num)
452                 drvs->jabber_events = rxf_stats->port1_jabber_events;
453         else
454                 drvs->jabber_events = rxf_stats->port0_jabber_events;
455         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
456         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
457         drvs->forwarded_packets = rxf_stats->forwarded_packets;
458         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
459         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
460         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
461         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
462 }
463
464 static void populate_be_v1_stats(struct be_adapter *adapter)
465 {
466         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
467         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
468         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
469         struct be_port_rxf_stats_v1 *port_stats =
470                                         &rxf_stats->port[adapter->port_num];
471         struct be_drv_stats *drvs = &adapter->drv_stats;
472
473         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
474         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
475         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
476         drvs->rx_pause_frames = port_stats->rx_pause_frames;
477         drvs->rx_crc_errors = port_stats->rx_crc_errors;
478         drvs->rx_control_frames = port_stats->rx_control_frames;
479         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
480         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
481         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
482         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
483         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
484         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
485         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
486         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
487         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
488         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
489         drvs->rx_dropped_header_too_small =
490                 port_stats->rx_dropped_header_too_small;
491         drvs->rx_input_fifo_overflow_drop =
492                 port_stats->rx_input_fifo_overflow_drop;
493         drvs->rx_address_filtered = port_stats->rx_address_filtered;
494         drvs->rx_alignment_symbol_errors =
495                 port_stats->rx_alignment_symbol_errors;
496         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
497         drvs->tx_pauseframes = port_stats->tx_pauseframes;
498         drvs->tx_controlframes = port_stats->tx_controlframes;
499         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
500         drvs->jabber_events = port_stats->jabber_events;
501         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
502         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
503         drvs->forwarded_packets = rxf_stats->forwarded_packets;
504         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
505         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
506         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
507         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
508 }
509
510 static void populate_be_v2_stats(struct be_adapter *adapter)
511 {
512         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
513         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
514         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
515         struct be_port_rxf_stats_v2 *port_stats =
516                                         &rxf_stats->port[adapter->port_num];
517         struct be_drv_stats *drvs = &adapter->drv_stats;
518
519         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
520         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
521         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
522         drvs->rx_pause_frames = port_stats->rx_pause_frames;
523         drvs->rx_crc_errors = port_stats->rx_crc_errors;
524         drvs->rx_control_frames = port_stats->rx_control_frames;
525         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
526         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
527         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
528         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
529         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
530         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
531         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
532         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
533         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
534         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
535         drvs->rx_dropped_header_too_small =
536                 port_stats->rx_dropped_header_too_small;
537         drvs->rx_input_fifo_overflow_drop =
538                 port_stats->rx_input_fifo_overflow_drop;
539         drvs->rx_address_filtered = port_stats->rx_address_filtered;
540         drvs->rx_alignment_symbol_errors =
541                 port_stats->rx_alignment_symbol_errors;
542         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
543         drvs->tx_pauseframes = port_stats->tx_pauseframes;
544         drvs->tx_controlframes = port_stats->tx_controlframes;
545         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
546         drvs->jabber_events = port_stats->jabber_events;
547         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
548         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
549         drvs->forwarded_packets = rxf_stats->forwarded_packets;
550         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
551         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
552         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
553         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
554         if (be_roce_supported(adapter)) {
555                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
556                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
557                 drvs->rx_roce_frames = port_stats->roce_frames_received;
558                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
559                 drvs->roce_drops_payload_len =
560                         port_stats->roce_drops_payload_len;
561         }
562 }
563
564 static void populate_lancer_stats(struct be_adapter *adapter)
565 {
566         struct be_drv_stats *drvs = &adapter->drv_stats;
567         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
568
569         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
570         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
571         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
572         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
573         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
574         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
575         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
576         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
577         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
578         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
579         drvs->rx_dropped_tcp_length =
580                                 pport_stats->rx_dropped_invalid_tcp_length;
581         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
582         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
583         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
584         drvs->rx_dropped_header_too_small =
585                                 pport_stats->rx_dropped_header_too_small;
586         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
587         drvs->rx_address_filtered =
588                                         pport_stats->rx_address_filtered +
589                                         pport_stats->rx_vlan_filtered;
590         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
591         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
592         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
593         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
594         drvs->jabber_events = pport_stats->rx_jabbers;
595         drvs->forwarded_packets = pport_stats->num_forwards_lo;
596         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
597         drvs->rx_drops_too_many_frags =
598                                 pport_stats->rx_drops_too_many_frags_lo;
599 }
600
601 static void accumulate_16bit_val(u32 *acc, u16 val)
602 {
603 #define lo(x)                   (x & 0xFFFF)
604 #define hi(x)                   (x & 0xFFFF0000)
605         bool wrapped = val < lo(*acc);
606         u32 newacc = hi(*acc) + val;
607
608         if (wrapped)
609                 newacc += 65536;
610         WRITE_ONCE(*acc, newacc);
611 }
612
613 static void populate_erx_stats(struct be_adapter *adapter,
614                                struct be_rx_obj *rxo, u32 erx_stat)
615 {
616         if (!BEx_chip(adapter))
617                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
618         else
619                 /* below erx HW counter can actually wrap around after
620                  * 65535. Driver accumulates a 32-bit value
621                  */
622                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
623                                      (u16)erx_stat);
624 }
625
626 void be_parse_stats(struct be_adapter *adapter)
627 {
628         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
629         struct be_rx_obj *rxo;
630         int i;
631         u32 erx_stat;
632
633         if (lancer_chip(adapter)) {
634                 populate_lancer_stats(adapter);
635         } else {
636                 if (BE2_chip(adapter))
637                         populate_be_v0_stats(adapter);
638                 else if (BE3_chip(adapter))
639                         /* for BE3 */
640                         populate_be_v1_stats(adapter);
641                 else
642                         populate_be_v2_stats(adapter);
643
644                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
645                 for_all_rx_queues(adapter, rxo, i) {
646                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
647                         populate_erx_stats(adapter, rxo, erx_stat);
648                 }
649         }
650 }
651
652 static void be_get_stats64(struct net_device *netdev,
653                            struct rtnl_link_stats64 *stats)
654 {
655         struct be_adapter *adapter = netdev_priv(netdev);
656         struct be_drv_stats *drvs = &adapter->drv_stats;
657         struct be_rx_obj *rxo;
658         struct be_tx_obj *txo;
659         u64 pkts, bytes;
660         unsigned int start;
661         int i;
662
663         for_all_rx_queues(adapter, rxo, i) {
664                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
665
666                 do {
667                         start = u64_stats_fetch_begin(&rx_stats->sync);
668                         pkts = rx_stats(rxo)->rx_pkts;
669                         bytes = rx_stats(rxo)->rx_bytes;
670                 } while (u64_stats_fetch_retry(&rx_stats->sync, start));
671                 stats->rx_packets += pkts;
672                 stats->rx_bytes += bytes;
673                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
674                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
675                                         rx_stats(rxo)->rx_drops_no_frags;
676         }
677
678         for_all_tx_queues(adapter, txo, i) {
679                 const struct be_tx_stats *tx_stats = tx_stats(txo);
680
681                 do {
682                         start = u64_stats_fetch_begin(&tx_stats->sync);
683                         pkts = tx_stats(txo)->tx_pkts;
684                         bytes = tx_stats(txo)->tx_bytes;
685                 } while (u64_stats_fetch_retry(&tx_stats->sync, start));
686                 stats->tx_packets += pkts;
687                 stats->tx_bytes += bytes;
688         }
689
690         /* bad pkts received */
691         stats->rx_errors = drvs->rx_crc_errors +
692                 drvs->rx_alignment_symbol_errors +
693                 drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long +
696                 drvs->rx_dropped_too_small +
697                 drvs->rx_dropped_too_short +
698                 drvs->rx_dropped_header_too_small +
699                 drvs->rx_dropped_tcp_length +
700                 drvs->rx_dropped_runt;
701
702         /* detailed rx errors */
703         stats->rx_length_errors = drvs->rx_in_range_errors +
704                 drvs->rx_out_range_errors +
705                 drvs->rx_frame_too_long;
706
707         stats->rx_crc_errors = drvs->rx_crc_errors;
708
709         /* frame alignment errors */
710         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
711
712         /* receiver fifo overrun */
713         /* drops_no_pbuf is no per i/f, it's per BE card */
714         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
715                                 drvs->rx_input_fifo_overflow_drop +
716                                 drvs->rx_drops_no_pbuf;
717 }
718
719 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
720 {
721         struct net_device *netdev = adapter->netdev;
722
723         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
724                 netif_carrier_off(netdev);
725                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
726         }
727
728         if (link_status)
729                 netif_carrier_on(netdev);
730         else
731                 netif_carrier_off(netdev);
732
733         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
734 }
735
736 static int be_gso_hdr_len(struct sk_buff *skb)
737 {
738         if (skb->encapsulation)
739                 return skb_inner_tcp_all_headers(skb);
740
741         return skb_tcp_all_headers(skb);
742 }
743
744 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
745 {
746         struct be_tx_stats *stats = tx_stats(txo);
747         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
748         /* Account for headers which get duplicated in TSO pkt */
749         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
750
751         u64_stats_update_begin(&stats->sync);
752         stats->tx_reqs++;
753         stats->tx_bytes += skb->len + dup_hdr_len;
754         stats->tx_pkts += tx_pkts;
755         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
756                 stats->tx_vxlan_offload_pkts += tx_pkts;
757         u64_stats_update_end(&stats->sync);
758 }
759
760 /* Returns number of WRBs needed for the skb */
761 static u32 skb_wrb_cnt(struct sk_buff *skb)
762 {
763         /* +1 for the header wrb */
764         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
765 }
766
767 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
768 {
769         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
770         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
771         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
772         wrb->rsvd0 = 0;
773 }
774
775 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
776  * to avoid the swap and shift/mask operations in wrb_fill().
777  */
778 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
779 {
780         wrb->frag_pa_hi = 0;
781         wrb->frag_pa_lo = 0;
782         wrb->frag_len = 0;
783         wrb->rsvd0 = 0;
784 }
785
786 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
787                                      struct sk_buff *skb)
788 {
789         u8 vlan_prio;
790         u16 vlan_tag;
791
792         vlan_tag = skb_vlan_tag_get(skb);
793         vlan_prio = skb_vlan_tag_get_prio(skb);
794         /* If vlan priority provided by OS is NOT in available bmap */
795         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
796                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
797                                 adapter->recommended_prio_bits;
798
799         return vlan_tag;
800 }
801
802 /* Used only for IP tunnel packets */
803 static u16 skb_inner_ip_proto(struct sk_buff *skb)
804 {
805         return (inner_ip_hdr(skb)->version == 4) ?
806                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
807 }
808
809 static u16 skb_ip_proto(struct sk_buff *skb)
810 {
811         return (ip_hdr(skb)->version == 4) ?
812                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
813 }
814
815 static inline bool be_is_txq_full(struct be_tx_obj *txo)
816 {
817         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
818 }
819
820 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
821 {
822         return atomic_read(&txo->q.used) < txo->q.len / 2;
823 }
824
825 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
826 {
827         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
828 }
829
830 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
831                                        struct sk_buff *skb,
832                                        struct be_wrb_params *wrb_params)
833 {
834         u16 proto;
835
836         if (skb_is_gso(skb)) {
837                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
838                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
839                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
840                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
841         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
842                 if (skb->encapsulation) {
843                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
844                         proto = skb_inner_ip_proto(skb);
845                 } else {
846                         proto = skb_ip_proto(skb);
847                 }
848                 if (proto == IPPROTO_TCP)
849                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
850                 else if (proto == IPPROTO_UDP)
851                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
852         }
853
854         if (skb_vlan_tag_present(skb)) {
855                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
856                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
857         }
858
859         BE_WRB_F_SET(wrb_params->features, CRC, 1);
860 }
861
862 static void wrb_fill_hdr(struct be_adapter *adapter,
863                          struct be_eth_hdr_wrb *hdr,
864                          struct be_wrb_params *wrb_params,
865                          struct sk_buff *skb)
866 {
867         memset(hdr, 0, sizeof(*hdr));
868
869         SET_TX_WRB_HDR_BITS(crc, hdr,
870                             BE_WRB_F_GET(wrb_params->features, CRC));
871         SET_TX_WRB_HDR_BITS(ipcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, IPCS));
873         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, TCPCS));
875         SET_TX_WRB_HDR_BITS(udpcs, hdr,
876                             BE_WRB_F_GET(wrb_params->features, UDPCS));
877
878         SET_TX_WRB_HDR_BITS(lso, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO));
880         SET_TX_WRB_HDR_BITS(lso6, hdr,
881                             BE_WRB_F_GET(wrb_params->features, LSO6));
882         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
883
884         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
885          * hack is not needed, the evt bit is set while ringing DB.
886          */
887         SET_TX_WRB_HDR_BITS(event, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
889         SET_TX_WRB_HDR_BITS(vlan, hdr,
890                             BE_WRB_F_GET(wrb_params->features, VLAN));
891         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
892
893         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
894         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
895         SET_TX_WRB_HDR_BITS(mgmt, hdr,
896                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
897 }
898
899 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
900                           bool unmap_single)
901 {
902         dma_addr_t dma;
903         u32 frag_len = le32_to_cpu(wrb->frag_len);
904
905
906         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
907                 (u64)le32_to_cpu(wrb->frag_pa_lo);
908         if (frag_len) {
909                 if (unmap_single)
910                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
911                 else
912                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
913         }
914 }
915
916 /* Grab a WRB header for xmit */
917 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
918 {
919         u32 head = txo->q.head;
920
921         queue_head_inc(&txo->q);
922         return head;
923 }
924
925 /* Set up the WRB header for xmit */
926 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
927                                 struct be_tx_obj *txo,
928                                 struct be_wrb_params *wrb_params,
929                                 struct sk_buff *skb, u16 head)
930 {
931         u32 num_frags = skb_wrb_cnt(skb);
932         struct be_queue_info *txq = &txo->q;
933         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
934
935         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
936         be_dws_cpu_to_le(hdr, sizeof(*hdr));
937
938         BUG_ON(txo->sent_skb_list[head]);
939         txo->sent_skb_list[head] = skb;
940         txo->last_req_hdr = head;
941         atomic_add(num_frags, &txq->used);
942         txo->last_req_wrb_cnt = num_frags;
943         txo->pend_wrb_cnt += num_frags;
944 }
945
946 /* Setup a WRB fragment (buffer descriptor) for xmit */
947 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
948                                  int len)
949 {
950         struct be_eth_wrb *wrb;
951         struct be_queue_info *txq = &txo->q;
952
953         wrb = queue_head_node(txq);
954         wrb_fill(wrb, busaddr, len);
955         queue_head_inc(txq);
956 }
957
958 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
959  * was invoked. The producer index is restored to the previous packet and the
960  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
961  */
962 static void be_xmit_restore(struct be_adapter *adapter,
963                             struct be_tx_obj *txo, u32 head, bool map_single,
964                             u32 copied)
965 {
966         struct device *dev;
967         struct be_eth_wrb *wrb;
968         struct be_queue_info *txq = &txo->q;
969
970         dev = &adapter->pdev->dev;
971         txq->head = head;
972
973         /* skip the first wrb (hdr); it's not mapped */
974         queue_head_inc(txq);
975         while (copied) {
976                 wrb = queue_head_node(txq);
977                 unmap_tx_frag(dev, wrb, map_single);
978                 map_single = false;
979                 copied -= le32_to_cpu(wrb->frag_len);
980                 queue_head_inc(txq);
981         }
982
983         txq->head = head;
984 }
985
986 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
987  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
988  * of WRBs used up by the packet.
989  */
990 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
991                            struct sk_buff *skb,
992                            struct be_wrb_params *wrb_params)
993 {
994         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
995         struct device *dev = &adapter->pdev->dev;
996         bool map_single = false;
997         u32 head;
998         dma_addr_t busaddr;
999         int len;
1000
1001         head = be_tx_get_wrb_hdr(txo);
1002
1003         if (skb->len > skb->data_len) {
1004                 len = skb_headlen(skb);
1005
1006                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007                 if (dma_mapping_error(dev, busaddr))
1008                         goto dma_err;
1009                 map_single = true;
1010                 be_tx_setup_wrb_frag(txo, busaddr, len);
1011                 copied += len;
1012         }
1013
1014         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1016                 len = skb_frag_size(frag);
1017
1018                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019                 if (dma_mapping_error(dev, busaddr))
1020                         goto dma_err;
1021                 be_tx_setup_wrb_frag(txo, busaddr, len);
1022                 copied += len;
1023         }
1024
1025         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027         be_tx_stats_update(txo, skb);
1028         return wrb_cnt;
1029
1030 dma_err:
1031         adapter->drv_stats.dma_map_errors++;
1032         be_xmit_restore(adapter, txo, head, map_single, copied);
1033         return 0;
1034 }
1035
1036 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037 {
1038         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039 }
1040
1041 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042                                              struct sk_buff *skb,
1043                                              struct be_wrb_params
1044                                              *wrb_params)
1045 {
1046         bool insert_vlan = false;
1047         u16 vlan_tag = 0;
1048
1049         skb = skb_share_check(skb, GFP_ATOMIC);
1050         if (unlikely(!skb))
1051                 return skb;
1052
1053         if (skb_vlan_tag_present(skb)) {
1054                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1055                 insert_vlan = true;
1056         }
1057
1058         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1059                 if (!insert_vlan) {
1060                         vlan_tag = adapter->pvid;
1061                         insert_vlan = true;
1062                 }
1063                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1064                  * skip VLAN insertion
1065                  */
1066                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1067         }
1068
1069         if (insert_vlan) {
1070                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1071                                                 vlan_tag);
1072                 if (unlikely(!skb))
1073                         return skb;
1074                 __vlan_hwaccel_clear_tag(skb);
1075         }
1076
1077         /* Insert the outer VLAN, if any */
1078         if (adapter->qnq_vid) {
1079                 vlan_tag = adapter->qnq_vid;
1080                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1081                                                 vlan_tag);
1082                 if (unlikely(!skb))
1083                         return skb;
1084                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1085         }
1086
1087         return skb;
1088 }
1089
1090 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1091 {
1092         struct ethhdr *eh = (struct ethhdr *)skb->data;
1093         u16 offset = ETH_HLEN;
1094
1095         if (eh->h_proto == htons(ETH_P_IPV6)) {
1096                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1097
1098                 offset += sizeof(struct ipv6hdr);
1099                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1100                     ip6h->nexthdr != NEXTHDR_UDP) {
1101                         struct ipv6_opt_hdr *ehdr =
1102                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1103
1104                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1105                         if (ehdr->hdrlen == 0xff)
1106                                 return true;
1107                 }
1108         }
1109         return false;
1110 }
1111
1112 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113 {
1114         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1115 }
1116
1117 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1118 {
1119         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1120 }
1121
1122 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1123                                                   struct sk_buff *skb,
1124                                                   struct be_wrb_params
1125                                                   *wrb_params)
1126 {
1127         struct vlan_ethhdr *veh = skb_vlan_eth_hdr(skb);
1128         unsigned int eth_hdr_len;
1129         struct iphdr *ip;
1130
1131         /* For padded packets, BE HW modifies tot_len field in IP header
1132          * incorrecly when VLAN tag is inserted by HW.
1133          * For padded packets, Lancer computes incorrect checksum.
1134          */
1135         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1136                                                 VLAN_ETH_HLEN : ETH_HLEN;
1137         if (skb->len <= 60 &&
1138             (lancer_chip(adapter) || BE3_chip(adapter) ||
1139              skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1140                 ip = (struct iphdr *)ip_hdr(skb);
1141                 if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1142                         goto tx_drop;
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270 #define is_arp_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273 #define is_dhcp_client_filt_enabled(adapter)    \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276 #define is_dhcp_srvr_filt_enabled(adapter)      \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279 #define is_nbios_filt_enabled(adapter)  \
1280                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282 #define is_ipv6_na_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask &       \
1284                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286 #define is_ipv6_ra_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289 #define is_ipv6_ras_filt_enabled(adapter)       \
1290                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292 #define is_broadcast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295 #define is_multicast_filt_enabled(adapter)      \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                                struct sk_buff **skb)
1300 {
1301         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302         bool os2bmc = false;
1303
1304         if (!be_is_os2bmc_enabled(adapter))
1305                 goto done;
1306
1307         if (!is_multicast_ether_addr(eh->h_dest))
1308                 goto done;
1309
1310         if (is_mc_allowed_on_bmc(adapter, eh) ||
1311             is_bc_allowed_on_bmc(adapter, eh) ||
1312             is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                 os2bmc = true;
1314                 goto done;
1315         }
1316
1317         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                 u8 nexthdr = hdr->nexthdr;
1320
1321                 if (nexthdr == IPPROTO_ICMPV6) {
1322                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                         switch (icmp6->icmp6_type) {
1325                         case NDISC_ROUTER_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                 goto done;
1328                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                 goto done;
1331                         default:
1332                                 break;
1333                         }
1334                 }
1335         }
1336
1337         if (is_udp_pkt((*skb))) {
1338                 struct udphdr *udp = udp_hdr((*skb));
1339
1340                 switch (ntohs(udp->dest)) {
1341                 case DHCP_CLIENT_PORT:
1342                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                         goto done;
1344                 case DHCP_SERVER_PORT:
1345                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                         goto done;
1347                 case NET_BIOS_PORT1:
1348                 case NET_BIOS_PORT2:
1349                         os2bmc = is_nbios_filt_enabled(adapter);
1350                         goto done;
1351                 case DHCPV6_RAS_PORT:
1352                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                         goto done;
1354                 default:
1355                         break;
1356                 }
1357         }
1358 done:
1359         /* For packets over a vlan, which are destined
1360          * to BMC, asic expects the vlan to be inline in the packet.
1361          */
1362         if (os2bmc)
1363                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365         return os2bmc;
1366 }
1367
1368 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369 {
1370         struct be_adapter *adapter = netdev_priv(netdev);
1371         u16 q_idx = skb_get_queue_mapping(skb);
1372         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373         struct be_wrb_params wrb_params = { 0 };
1374         bool flush = !netdev_xmit_more();
1375         u16 wrb_cnt;
1376
1377         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378         if (unlikely(!skb))
1379                 goto drop;
1380
1381         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384         if (unlikely(!wrb_cnt)) {
1385                 dev_kfree_skb_any(skb);
1386                 goto drop;
1387         }
1388
1389         /* if os2bmc is enabled and if the pkt is destined to bmc,
1390          * enqueue the pkt a 2nd time with mgmt bit set.
1391          */
1392         if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                 if (unlikely(!wrb_cnt))
1396                         goto drop;
1397                 else
1398                         skb_get(skb);
1399         }
1400
1401         if (be_is_txq_full(txo)) {
1402                 netif_stop_subqueue(netdev, q_idx);
1403                 tx_stats(txo)->tx_stops++;
1404         }
1405
1406         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                 be_xmit_flush(adapter, txo);
1408
1409         return NETDEV_TX_OK;
1410 drop:
1411         tx_stats(txo)->tx_drv_drops++;
1412         /* Flush the already enqueued tx requests */
1413         if (flush && txo->pend_wrb_cnt)
1414                 be_xmit_flush(adapter, txo);
1415
1416         return NETDEV_TX_OK;
1417 }
1418
1419 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420 {
1421         struct be_adapter *adapter = netdev_priv(netdev);
1422         struct device *dev = &adapter->pdev->dev;
1423         struct be_tx_obj *txo;
1424         struct sk_buff *skb;
1425         struct tcphdr *tcphdr;
1426         struct udphdr *udphdr;
1427         u32 *entry;
1428         int status;
1429         int i, j;
1430
1431         for_all_tx_queues(adapter, txo, i) {
1432                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                          i, txo->q.head, txo->q.tail,
1434                          atomic_read(&txo->q.used), txo->q.id);
1435
1436                 entry = txo->q.dma_mem.va;
1437                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                          j, entry[j], entry[j + 1],
1442                                          entry[j + 2], entry[j + 3]);
1443                         }
1444                 }
1445
1446                 entry = txo->cq.dma_mem.va;
1447                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                          i, txo->cq.head, txo->cq.tail,
1449                          atomic_read(&txo->cq.used));
1450                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                          j, entry[j], entry[j + 1],
1455                                          entry[j + 2], entry[j + 3]);
1456                         }
1457                 }
1458
1459                 for (j = 0; j < TX_Q_LEN; j++) {
1460                         if (txo->sent_skb_list[j]) {
1461                                 skb = txo->sent_skb_list[j];
1462                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                         tcphdr = tcp_hdr(skb);
1464                                         dev_info(dev, "TCP source port %d\n",
1465                                                  ntohs(tcphdr->source));
1466                                         dev_info(dev, "TCP dest port %d\n",
1467                                                  ntohs(tcphdr->dest));
1468                                         dev_info(dev, "TCP sequence num %d\n",
1469                                                  ntohs(tcphdr->seq));
1470                                         dev_info(dev, "TCP ack_seq %d\n",
1471                                                  ntohs(tcphdr->ack_seq));
1472                                 } else if (ip_hdr(skb)->protocol ==
1473                                            IPPROTO_UDP) {
1474                                         udphdr = udp_hdr(skb);
1475                                         dev_info(dev, "UDP source port %d\n",
1476                                                  ntohs(udphdr->source));
1477                                         dev_info(dev, "UDP dest port %d\n",
1478                                                  ntohs(udphdr->dest));
1479                                 }
1480                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                          j, skb, skb->len, skb->protocol);
1482                         }
1483                 }
1484         }
1485
1486         if (lancer_chip(adapter)) {
1487                 dev_info(dev, "Initiating reset due to tx timeout\n");
1488                 dev_info(dev, "Resetting adapter\n");
1489                 status = lancer_physdev_ctrl(adapter,
1490                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1491                 if (status)
1492                         dev_err(dev, "Reset failed .. Reboot server\n");
1493         }
1494 }
1495
1496 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497 {
1498         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1500 }
1501
1502 static int be_set_vlan_promisc(struct be_adapter *adapter)
1503 {
1504         struct device *dev = &adapter->pdev->dev;
1505         int status;
1506
1507         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                 return 0;
1509
1510         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511         if (!status) {
1512                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514         } else {
1515                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516         }
1517         return status;
1518 }
1519
1520 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521 {
1522         struct device *dev = &adapter->pdev->dev;
1523         int status;
1524
1525         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526         if (!status) {
1527                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529         }
1530         return status;
1531 }
1532
1533 /*
1534  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535  * If the user configures more, place BE in vlan promiscuous mode.
1536  */
1537 static int be_vid_config(struct be_adapter *adapter)
1538 {
1539         struct device *dev = &adapter->pdev->dev;
1540         u16 vids[BE_NUM_VLANS_SUPPORTED];
1541         u16 num = 0, i = 0;
1542         int status = 0;
1543
1544         /* No need to change the VLAN state if the I/F is in promiscuous */
1545         if (adapter->netdev->flags & IFF_PROMISC)
1546                 return 0;
1547
1548         if (adapter->vlans_added > be_max_vlans(adapter))
1549                 return be_set_vlan_promisc(adapter);
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                 status = be_clear_vlan_promisc(adapter);
1553                 if (status)
1554                         return status;
1555         }
1556         /* Construct VLAN Table to give to HW */
1557         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                 vids[num++] = cpu_to_le16(i);
1559
1560         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561         if (status) {
1562                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                     addl_status(status) ==
1566                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                         return be_set_vlan_promisc(adapter);
1568         }
1569         return status;
1570 }
1571
1572 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573 {
1574         struct be_adapter *adapter = netdev_priv(netdev);
1575         int status = 0;
1576
1577         mutex_lock(&adapter->rx_filter_lock);
1578
1579         /* Packets with VID 0 are always received by Lancer by default */
1580         if (lancer_chip(adapter) && vid == 0)
1581                 goto done;
1582
1583         if (test_bit(vid, adapter->vids))
1584                 goto done;
1585
1586         set_bit(vid, adapter->vids);
1587         adapter->vlans_added++;
1588
1589         status = be_vid_config(adapter);
1590 done:
1591         mutex_unlock(&adapter->rx_filter_lock);
1592         return status;
1593 }
1594
1595 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598         int status = 0;
1599
1600         mutex_lock(&adapter->rx_filter_lock);
1601
1602         /* Packets with VID 0 are always received by Lancer by default */
1603         if (lancer_chip(adapter) && vid == 0)
1604                 goto done;
1605
1606         if (!test_bit(vid, adapter->vids))
1607                 goto done;
1608
1609         clear_bit(vid, adapter->vids);
1610         adapter->vlans_added--;
1611
1612         status = be_vid_config(adapter);
1613 done:
1614         mutex_unlock(&adapter->rx_filter_lock);
1615         return status;
1616 }
1617
1618 static void be_set_all_promisc(struct be_adapter *adapter)
1619 {
1620         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622 }
1623
1624 static void be_set_mc_promisc(struct be_adapter *adapter)
1625 {
1626         int status;
1627
1628         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                 return;
1630
1631         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632         if (!status)
1633                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634 }
1635
1636 static void be_set_uc_promisc(struct be_adapter *adapter)
1637 {
1638         int status;
1639
1640         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                 return;
1642
1643         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644         if (!status)
1645                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646 }
1647
1648 static void be_clear_uc_promisc(struct be_adapter *adapter)
1649 {
1650         int status;
1651
1652         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                 return;
1654
1655         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656         if (!status)
1657                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658 }
1659
1660 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661  * We use a single callback function for both sync and unsync. We really don't
1662  * add/remove addresses through this callback. But, we use it to detect changes
1663  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664  */
1665 static int be_uc_list_update(struct net_device *netdev,
1666                              const unsigned char *addr)
1667 {
1668         struct be_adapter *adapter = netdev_priv(netdev);
1669
1670         adapter->update_uc_list = true;
1671         return 0;
1672 }
1673
1674 static int be_mc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_mc_list = true;
1680         return 0;
1681 }
1682
1683 static void be_set_mc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool mc_promisc = false;
1688         int status;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_mc_list = false;
1695         } else if (netdev->flags & IFF_ALLMULTI ||
1696                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                 /* Enable multicast promisc if num configured exceeds
1698                  * what we support
1699                  */
1700                 mc_promisc = true;
1701                 adapter->update_mc_list = false;
1702         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                 /* Update mc-list unconditionally if the iface was previously
1704                  * in mc-promisc mode and now is out of that mode.
1705                  */
1706                 adapter->update_mc_list = true;
1707         }
1708
1709         if (adapter->update_mc_list) {
1710                 int i = 0;
1711
1712                 /* cache the mc-list in adapter */
1713                 netdev_for_each_mc_addr(ha, netdev) {
1714                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                         i++;
1716                 }
1717                 adapter->mc_count = netdev_mc_count(netdev);
1718         }
1719         netif_addr_unlock_bh(netdev);
1720
1721         if (mc_promisc) {
1722                 be_set_mc_promisc(adapter);
1723         } else if (adapter->update_mc_list) {
1724                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                 if (!status)
1726                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                 else
1728                         be_set_mc_promisc(adapter);
1729
1730                 adapter->update_mc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_mc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         __dev_mc_unsync(netdev, NULL);
1739         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740         adapter->mc_count = 0;
1741 }
1742
1743 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744 {
1745         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                 return 0;
1748         }
1749
1750         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                                adapter->if_handle,
1752                                &adapter->pmac_id[uc_idx + 1], 0);
1753 }
1754
1755 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756 {
1757         if (pmac_id == adapter->pmac_id[0])
1758                 return;
1759
1760         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761 }
1762
1763 static void be_set_uc_list(struct be_adapter *adapter)
1764 {
1765         struct net_device *netdev = adapter->netdev;
1766         struct netdev_hw_addr *ha;
1767         bool uc_promisc = false;
1768         int curr_uc_macs = 0, i;
1769
1770         netif_addr_lock_bh(netdev);
1771         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773         if (netdev->flags & IFF_PROMISC) {
1774                 adapter->update_uc_list = false;
1775         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                 uc_promisc = true;
1777                 adapter->update_uc_list = false;
1778         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                 /* Update uc-list unconditionally if the iface was previously
1780                  * in uc-promisc mode and now is out of that mode.
1781                  */
1782                 adapter->update_uc_list = true;
1783         }
1784
1785         if (adapter->update_uc_list) {
1786                 /* cache the uc-list in adapter array */
1787                 i = 0;
1788                 netdev_for_each_uc_addr(ha, netdev) {
1789                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                         i++;
1791                 }
1792                 curr_uc_macs = netdev_uc_count(netdev);
1793         }
1794         netif_addr_unlock_bh(netdev);
1795
1796         if (uc_promisc) {
1797                 be_set_uc_promisc(adapter);
1798         } else if (adapter->update_uc_list) {
1799                 be_clear_uc_promisc(adapter);
1800
1801                 for (i = 0; i < adapter->uc_macs; i++)
1802                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                 for (i = 0; i < curr_uc_macs; i++)
1805                         be_uc_mac_add(adapter, i);
1806                 adapter->uc_macs = curr_uc_macs;
1807                 adapter->update_uc_list = false;
1808         }
1809 }
1810
1811 static void be_clear_uc_list(struct be_adapter *adapter)
1812 {
1813         struct net_device *netdev = adapter->netdev;
1814         int i;
1815
1816         __dev_uc_unsync(netdev, NULL);
1817         for (i = 0; i < adapter->uc_macs; i++)
1818                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820         adapter->uc_macs = 0;
1821 }
1822
1823 static void __be_set_rx_mode(struct be_adapter *adapter)
1824 {
1825         struct net_device *netdev = adapter->netdev;
1826
1827         mutex_lock(&adapter->rx_filter_lock);
1828
1829         if (netdev->flags & IFF_PROMISC) {
1830                 if (!be_in_all_promisc(adapter))
1831                         be_set_all_promisc(adapter);
1832         } else if (be_in_all_promisc(adapter)) {
1833                 /* We need to re-program the vlan-list or clear
1834                  * vlan-promisc mode (if needed) when the interface
1835                  * comes out of promisc mode.
1836                  */
1837                 be_vid_config(adapter);
1838         }
1839
1840         be_set_uc_list(adapter);
1841         be_set_mc_list(adapter);
1842
1843         mutex_unlock(&adapter->rx_filter_lock);
1844 }
1845
1846 static void be_work_set_rx_mode(struct work_struct *work)
1847 {
1848         struct be_cmd_work *cmd_work =
1849                                 container_of(work, struct be_cmd_work, work);
1850
1851         __be_set_rx_mode(cmd_work->adapter);
1852         kfree(cmd_work);
1853 }
1854
1855 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856 {
1857         struct be_adapter *adapter = netdev_priv(netdev);
1858         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859         int status;
1860
1861         if (!sriov_enabled(adapter))
1862                 return -EPERM;
1863
1864         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                 return -EINVAL;
1866
1867         /* Proceed further only if user provided MAC is different
1868          * from active MAC
1869          */
1870         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                 return 0;
1872
1873         if (BEx_chip(adapter)) {
1874                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                 vf + 1);
1876
1877                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                          &vf_cfg->pmac_id, vf + 1);
1879         } else {
1880                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                         vf + 1);
1882         }
1883
1884         if (status) {
1885                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                         mac, vf, status);
1887                 return be_cmd_status(status);
1888         }
1889
1890         ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892         return 0;
1893 }
1894
1895 static int be_get_vf_config(struct net_device *netdev, int vf,
1896                             struct ifla_vf_info *vi)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs)
1905                 return -EINVAL;
1906
1907         vi->vf = vf;
1908         vi->max_tx_rate = vf_cfg->tx_rate;
1909         vi->min_tx_rate = 0;
1910         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916         return 0;
1917 }
1918
1919 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920 {
1921         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922         u16 vids[BE_NUM_VLANS_SUPPORTED];
1923         int vf_if_id = vf_cfg->if_handle;
1924         int status;
1925
1926         /* Enable Transparent VLAN Tagging */
1927         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928         if (status)
1929                 return status;
1930
1931         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932         vids[0] = 0;
1933         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934         if (!status)
1935                 dev_info(&adapter->pdev->dev,
1936                          "Cleared guest VLANs on VF%d", vf);
1937
1938         /* After TVT is enabled, disallow VFs to program VLAN filters */
1939         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1942                 if (!status)
1943                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944         }
1945         return 0;
1946 }
1947
1948 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949 {
1950         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951         struct device *dev = &adapter->pdev->dev;
1952         int status;
1953
1954         /* Reset Transparent VLAN Tagging. */
1955         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                        vf_cfg->if_handle, 0, 0);
1957         if (status)
1958                 return status;
1959
1960         /* Allow VFs to program VLAN filtering */
1961         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                   BE_PRIV_FILTMGMT, vf + 1);
1964                 if (!status) {
1965                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                 }
1968         }
1969
1970         dev_info(dev,
1971                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972         return 0;
1973 }
1974
1975 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                           __be16 vlan_proto)
1977 {
1978         struct be_adapter *adapter = netdev_priv(netdev);
1979         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980         int status;
1981
1982         if (!sriov_enabled(adapter))
1983                 return -EPERM;
1984
1985         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                 return -EINVAL;
1987
1988         if (vlan_proto != htons(ETH_P_8021Q))
1989                 return -EPROTONOSUPPORT;
1990
1991         if (vlan || qos) {
1992                 vlan |= qos << VLAN_PRIO_SHIFT;
1993                 status = be_set_vf_tvt(adapter, vf, vlan);
1994         } else {
1995                 status = be_clear_vf_tvt(adapter, vf);
1996         }
1997
1998         if (status) {
1999                 dev_err(&adapter->pdev->dev,
2000                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                         status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         vf_cfg->vlan_tag = vlan;
2006         return 0;
2007 }
2008
2009 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                              int min_tx_rate, int max_tx_rate)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct device *dev = &adapter->pdev->dev;
2014         int percent_rate, status = 0;
2015         u16 link_speed = 0;
2016         u8 link_status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (min_tx_rate)
2025                 return -EINVAL;
2026
2027         if (!max_tx_rate)
2028                 goto config_qos;
2029
2030         status = be_cmd_link_status_query(adapter, &link_speed,
2031                                           &link_status, 0);
2032         if (status)
2033                 goto err;
2034
2035         if (!link_status) {
2036                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                 status = -ENETDOWN;
2038                 goto err;
2039         }
2040
2041         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                         link_speed);
2044                 status = -EINVAL;
2045                 goto err;
2046         }
2047
2048         /* On Skyhawk the QOS setting must be done only as a % value */
2049         percent_rate = link_speed / 100;
2050         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                         percent_rate);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057 config_qos:
2058         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059         if (status)
2060                 goto err;
2061
2062         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063         return 0;
2064
2065 err:
2066         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                 max_tx_rate, vf);
2068         return be_cmd_status(status);
2069 }
2070
2071 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                 int link_state)
2073 {
2074         struct be_adapter *adapter = netdev_priv(netdev);
2075         int status;
2076
2077         if (!sriov_enabled(adapter))
2078                 return -EPERM;
2079
2080         if (vf >= adapter->num_vfs)
2081                 return -EINVAL;
2082
2083         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084         if (status) {
2085                 dev_err(&adapter->pdev->dev,
2086                         "Link state change on VF %d failed: %#x\n", vf, status);
2087                 return be_cmd_status(status);
2088         }
2089
2090         adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092         return 0;
2093 }
2094
2095 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096 {
2097         struct be_adapter *adapter = netdev_priv(netdev);
2098         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099         u8 spoofchk;
2100         int status;
2101
2102         if (!sriov_enabled(adapter))
2103                 return -EPERM;
2104
2105         if (vf >= adapter->num_vfs)
2106                 return -EINVAL;
2107
2108         if (BEx_chip(adapter))
2109                 return -EOPNOTSUPP;
2110
2111         if (enable == vf_cfg->spoofchk)
2112                 return 0;
2113
2114         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                        0, spoofchk);
2118         if (status) {
2119                 dev_err(&adapter->pdev->dev,
2120                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                 return be_cmd_status(status);
2122         }
2123
2124         vf_cfg->spoofchk = enable;
2125         return 0;
2126 }
2127
2128 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                           ulong now)
2130 {
2131         aic->rx_pkts_prev = rx_pkts;
2132         aic->tx_reqs_prev = tx_pkts;
2133         aic->jiffies = now;
2134 }
2135
2136 static int be_get_new_eqd(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         int eqd, start;
2140         struct be_aic_obj *aic;
2141         struct be_rx_obj *rxo;
2142         struct be_tx_obj *txo;
2143         u64 rx_pkts = 0, tx_pkts = 0;
2144         ulong now;
2145         u32 pps, delta;
2146         int i;
2147
2148         aic = &adapter->aic_obj[eqo->idx];
2149         if (!adapter->aic_enabled) {
2150                 if (aic->jiffies)
2151                         aic->jiffies = 0;
2152                 eqd = aic->et_eqd;
2153                 return eqd;
2154         }
2155
2156         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                 do {
2158                         start = u64_stats_fetch_begin(&rxo->stats.sync);
2159                         rx_pkts += rxo->stats.rx_pkts;
2160                 } while (u64_stats_fetch_retry(&rxo->stats.sync, start));
2161         }
2162
2163         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                 do {
2165                         start = u64_stats_fetch_begin(&txo->stats.sync);
2166                         tx_pkts += txo->stats.tx_reqs;
2167                 } while (u64_stats_fetch_retry(&txo->stats.sync, start));
2168         }
2169
2170         /* Skip, if wrapped around or first calculation */
2171         now = jiffies;
2172         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173             rx_pkts < aic->rx_pkts_prev ||
2174             tx_pkts < aic->tx_reqs_prev) {
2175                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                 return aic->prev_eqd;
2177         }
2178
2179         delta = jiffies_to_msecs(now - aic->jiffies);
2180         if (delta == 0)
2181                 return aic->prev_eqd;
2182
2183         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185         eqd = (pps / 15000) << 2;
2186
2187         if (eqd < 8)
2188                 eqd = 0;
2189         eqd = min_t(u32, eqd, aic->max_eqd);
2190         eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192         be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194         return eqd;
2195 }
2196
2197 /* For Skyhawk-R only */
2198 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199 {
2200         struct be_adapter *adapter = eqo->adapter;
2201         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202         ulong now = jiffies;
2203         int eqd;
2204         u32 mult_enc;
2205
2206         if (!adapter->aic_enabled)
2207                 return 0;
2208
2209         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                 eqd = aic->prev_eqd;
2211         else
2212                 eqd = be_get_new_eqd(eqo);
2213
2214         if (eqd > 100)
2215                 mult_enc = R2I_DLY_ENC_1;
2216         else if (eqd > 60)
2217                 mult_enc = R2I_DLY_ENC_2;
2218         else if (eqd > 20)
2219                 mult_enc = R2I_DLY_ENC_3;
2220         else
2221                 mult_enc = R2I_DLY_ENC_0;
2222
2223         aic->prev_eqd = eqd;
2224
2225         return mult_enc;
2226 }
2227
2228 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229 {
2230         struct be_set_eqd set_eqd[MAX_EVT_QS];
2231         struct be_aic_obj *aic;
2232         struct be_eq_obj *eqo;
2233         int i, num = 0, eqd;
2234
2235         for_all_evt_queues(adapter, eqo, i) {
2236                 aic = &adapter->aic_obj[eqo->idx];
2237                 eqd = be_get_new_eqd(eqo);
2238                 if (force_update || eqd != aic->prev_eqd) {
2239                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                         set_eqd[num].eq_id = eqo->q.id;
2241                         aic->prev_eqd = eqd;
2242                         num++;
2243                 }
2244         }
2245
2246         if (num)
2247                 be_cmd_modify_eqd(adapter, set_eqd, num);
2248 }
2249
2250 static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                                struct be_rx_compl_info *rxcp)
2252 {
2253         struct be_rx_stats *stats = rx_stats(rxo);
2254
2255         u64_stats_update_begin(&stats->sync);
2256         stats->rx_compl++;
2257         stats->rx_bytes += rxcp->pkt_size;
2258         stats->rx_pkts++;
2259         if (rxcp->tunneled)
2260                 stats->rx_vxlan_offload_pkts++;
2261         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                 stats->rx_mcast_pkts++;
2263         if (rxcp->err)
2264                 stats->rx_compl_err++;
2265         u64_stats_update_end(&stats->sync);
2266 }
2267
2268 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269 {
2270         /* L4 checksum is not reliable for non TCP/UDP packets.
2271          * Also ignore ipcksm for ipv6 pkts
2272          */
2273         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275 }
2276
2277 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278 {
2279         struct be_adapter *adapter = rxo->adapter;
2280         struct be_rx_page_info *rx_page_info;
2281         struct be_queue_info *rxq = &rxo->q;
2282         u32 frag_idx = rxq->tail;
2283
2284         rx_page_info = &rxo->page_info_tbl[frag_idx];
2285         BUG_ON(!rx_page_info->page);
2286
2287         if (rx_page_info->last_frag) {
2288                 dma_unmap_page(&adapter->pdev->dev,
2289                                dma_unmap_addr(rx_page_info, bus),
2290                                adapter->big_page_size, DMA_FROM_DEVICE);
2291                 rx_page_info->last_frag = false;
2292         } else {
2293                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                         dma_unmap_addr(rx_page_info, bus),
2295                                         rx_frag_size, DMA_FROM_DEVICE);
2296         }
2297
2298         queue_tail_inc(rxq);
2299         atomic_dec(&rxq->used);
2300         return rx_page_info;
2301 }
2302
2303 /* Throwaway the data in the Rx completion */
2304 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_rx_page_info *page_info;
2308         u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310         for (i = 0; i < num_rcvd; i++) {
2311                 page_info = get_rx_page_info(rxo);
2312                 put_page(page_info->page);
2313                 memset(page_info, 0, sizeof(*page_info));
2314         }
2315 }
2316
2317 /*
2318  * skb_fill_rx_data forms a complete skb for an ether frame
2319  * indicated by rxcp.
2320  */
2321 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                              struct be_rx_compl_info *rxcp)
2323 {
2324         struct be_rx_page_info *page_info;
2325         u16 i, j;
2326         u16 hdr_len, curr_frag_len, remaining;
2327         u8 *start;
2328
2329         page_info = get_rx_page_info(rxo);
2330         start = page_address(page_info->page) + page_info->page_offset;
2331         prefetch(start);
2332
2333         /* Copy data in the first descriptor of this completion */
2334         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336         skb->len = curr_frag_len;
2337         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                 memcpy(skb->data, start, curr_frag_len);
2339                 /* Complete packet has now been moved to data */
2340                 put_page(page_info->page);
2341                 skb->data_len = 0;
2342                 skb->tail += curr_frag_len;
2343         } else {
2344                 hdr_len = ETH_HLEN;
2345                 memcpy(skb->data, start, hdr_len);
2346                 skb_shinfo(skb)->nr_frags = 1;
2347                 skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
2348                                         page_info->page,
2349                                         page_info->page_offset + hdr_len,
2350                                         curr_frag_len - hdr_len);
2351                 skb->data_len = curr_frag_len - hdr_len;
2352                 skb->truesize += rx_frag_size;
2353                 skb->tail += hdr_len;
2354         }
2355         page_info->page = NULL;
2356
2357         if (rxcp->pkt_size <= rx_frag_size) {
2358                 BUG_ON(rxcp->num_rcvd != 1);
2359                 return;
2360         }
2361
2362         /* More frags present for this completion */
2363         remaining = rxcp->pkt_size - curr_frag_len;
2364         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2365                 page_info = get_rx_page_info(rxo);
2366                 curr_frag_len = min(remaining, rx_frag_size);
2367
2368                 /* Coalesce all frags from the same physical page in one slot */
2369                 if (page_info->page_offset == 0) {
2370                         /* Fresh page */
2371                         j++;
2372                         skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2373                                                 page_info->page,
2374                                                 page_info->page_offset,
2375                                                 curr_frag_len);
2376                         skb_shinfo(skb)->nr_frags++;
2377                 } else {
2378                         put_page(page_info->page);
2379                         skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2380                                           curr_frag_len);
2381                 }
2382
2383                 skb->len += curr_frag_len;
2384                 skb->data_len += curr_frag_len;
2385                 skb->truesize += rx_frag_size;
2386                 remaining -= curr_frag_len;
2387                 page_info->page = NULL;
2388         }
2389         BUG_ON(j > MAX_SKB_FRAGS);
2390 }
2391
2392 /* Process the RX completion indicated by rxcp when GRO is disabled */
2393 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                 struct be_rx_compl_info *rxcp)
2395 {
2396         struct be_adapter *adapter = rxo->adapter;
2397         struct net_device *netdev = adapter->netdev;
2398         struct sk_buff *skb;
2399
2400         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401         if (unlikely(!skb)) {
2402                 rx_stats(rxo)->rx_drops_no_skbs++;
2403                 be_rx_compl_discard(rxo, rxcp);
2404                 return;
2405         }
2406
2407         skb_fill_rx_data(rxo, skb, rxcp);
2408
2409         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         else
2412                 skb_checksum_none_assert(skb);
2413
2414         skb->protocol = eth_type_trans(skb, netdev);
2415         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416         if (netdev->features & NETIF_F_RXHASH)
2417                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419         skb->csum_level = rxcp->tunneled;
2420         skb_mark_napi_id(skb, napi);
2421
2422         if (rxcp->vlanf)
2423                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425         netif_receive_skb(skb);
2426 }
2427
2428 /* Process the RX completion indicated by rxcp when GRO is enabled */
2429 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                     struct napi_struct *napi,
2431                                     struct be_rx_compl_info *rxcp)
2432 {
2433         struct be_adapter *adapter = rxo->adapter;
2434         struct be_rx_page_info *page_info;
2435         struct sk_buff *skb = NULL;
2436         u16 remaining, curr_frag_len;
2437         u16 i, j;
2438
2439         skb = napi_get_frags(napi);
2440         if (!skb) {
2441                 be_rx_compl_discard(rxo, rxcp);
2442                 return;
2443         }
2444
2445         remaining = rxcp->pkt_size;
2446         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                 page_info = get_rx_page_info(rxo);
2448
2449                 curr_frag_len = min(remaining, rx_frag_size);
2450
2451                 /* Coalesce all frags from the same physical page in one slot */
2452                 if (i == 0 || page_info->page_offset == 0) {
2453                         /* First frag or Fresh page */
2454                         j++;
2455                         skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
2456                                                 page_info->page,
2457                                                 page_info->page_offset,
2458                                                 curr_frag_len);
2459                 } else {
2460                         put_page(page_info->page);
2461                         skb_frag_size_add(&skb_shinfo(skb)->frags[j],
2462                                           curr_frag_len);
2463                 }
2464
2465                 skb->truesize += rx_frag_size;
2466                 remaining -= curr_frag_len;
2467                 memset(page_info, 0, sizeof(*page_info));
2468         }
2469         BUG_ON(j > MAX_SKB_FRAGS);
2470
2471         skb_shinfo(skb)->nr_frags = j + 1;
2472         skb->len = rxcp->pkt_size;
2473         skb->data_len = rxcp->pkt_size;
2474         skb->ip_summed = CHECKSUM_UNNECESSARY;
2475         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2476         if (adapter->netdev->features & NETIF_F_RXHASH)
2477                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2478
2479         skb->csum_level = rxcp->tunneled;
2480
2481         if (rxcp->vlanf)
2482                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2483
2484         napi_gro_frags(napi);
2485 }
2486
2487 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2488                                  struct be_rx_compl_info *rxcp)
2489 {
2490         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2491         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2492         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2493         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2494         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2495         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2496         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2497         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2498         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2499         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2500         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2501         if (rxcp->vlanf) {
2502                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2503                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2504         }
2505         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2506         rxcp->tunneled =
2507                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2508 }
2509
2510 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2511                                  struct be_rx_compl_info *rxcp)
2512 {
2513         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2514         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2515         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2516         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2517         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2518         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2519         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2520         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2521         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2522         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2523         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2524         if (rxcp->vlanf) {
2525                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2526                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2527         }
2528         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2529         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2530 }
2531
2532 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2533 {
2534         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2535         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2536         struct be_adapter *adapter = rxo->adapter;
2537
2538         /* For checking the valid bit it is Ok to use either definition as the
2539          * valid bit is at the same position in both v0 and v1 Rx compl */
2540         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2541                 return NULL;
2542
2543         rmb();
2544         be_dws_le_to_cpu(compl, sizeof(*compl));
2545
2546         if (adapter->be3_native)
2547                 be_parse_rx_compl_v1(compl, rxcp);
2548         else
2549                 be_parse_rx_compl_v0(compl, rxcp);
2550
2551         if (rxcp->ip_frag)
2552                 rxcp->l4_csum = 0;
2553
2554         if (rxcp->vlanf) {
2555                 /* In QNQ modes, if qnq bit is not set, then the packet was
2556                  * tagged only with the transparent outer vlan-tag and must
2557                  * not be treated as a vlan packet by host
2558                  */
2559                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2560                         rxcp->vlanf = 0;
2561
2562                 if (!lancer_chip(adapter))
2563                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2564
2565                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2566                     !test_bit(rxcp->vlan_tag, adapter->vids))
2567                         rxcp->vlanf = 0;
2568         }
2569
2570         /* As the compl has been parsed, reset it; we wont touch it again */
2571         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2572
2573         queue_tail_inc(&rxo->cq);
2574         return rxcp;
2575 }
2576
2577 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2578 {
2579         u32 order = get_order(size);
2580
2581         if (order > 0)
2582                 gfp |= __GFP_COMP;
2583         return  alloc_pages(gfp, order);
2584 }
2585
2586 /*
2587  * Allocate a page, split it to fragments of size rx_frag_size and post as
2588  * receive buffers to BE
2589  */
2590 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2591 {
2592         struct be_adapter *adapter = rxo->adapter;
2593         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2594         struct be_queue_info *rxq = &rxo->q;
2595         struct page *pagep = NULL;
2596         struct device *dev = &adapter->pdev->dev;
2597         struct be_eth_rx_d *rxd;
2598         u64 page_dmaaddr = 0, frag_dmaaddr;
2599         u32 posted, page_offset = 0, notify = 0;
2600
2601         page_info = &rxo->page_info_tbl[rxq->head];
2602         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2603                 if (!pagep) {
2604                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2605                         if (unlikely(!pagep)) {
2606                                 rx_stats(rxo)->rx_post_fail++;
2607                                 break;
2608                         }
2609                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2610                                                     adapter->big_page_size,
2611                                                     DMA_FROM_DEVICE);
2612                         if (dma_mapping_error(dev, page_dmaaddr)) {
2613                                 put_page(pagep);
2614                                 pagep = NULL;
2615                                 adapter->drv_stats.dma_map_errors++;
2616                                 break;
2617                         }
2618                         page_offset = 0;
2619                 } else {
2620                         get_page(pagep);
2621                         page_offset += rx_frag_size;
2622                 }
2623                 page_info->page_offset = page_offset;
2624                 page_info->page = pagep;
2625
2626                 rxd = queue_head_node(rxq);
2627                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2628                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2629                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2630
2631                 /* Any space left in the current big page for another frag? */
2632                 if ((page_offset + rx_frag_size + rx_frag_size) >
2633                                         adapter->big_page_size) {
2634                         pagep = NULL;
2635                         page_info->last_frag = true;
2636                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2637                 } else {
2638                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2639                 }
2640
2641                 prev_page_info = page_info;
2642                 queue_head_inc(rxq);
2643                 page_info = &rxo->page_info_tbl[rxq->head];
2644         }
2645
2646         /* Mark the last frag of a page when we break out of the above loop
2647          * with no more slots available in the RXQ
2648          */
2649         if (pagep) {
2650                 prev_page_info->last_frag = true;
2651                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2652         }
2653
2654         if (posted) {
2655                 atomic_add(posted, &rxq->used);
2656                 if (rxo->rx_post_starved)
2657                         rxo->rx_post_starved = false;
2658                 do {
2659                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2660                         be_rxq_notify(adapter, rxq->id, notify);
2661                         posted -= notify;
2662                 } while (posted);
2663         } else if (atomic_read(&rxq->used) == 0) {
2664                 /* Let be_worker replenish when memory is available */
2665                 rxo->rx_post_starved = true;
2666         }
2667 }
2668
2669 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2670 {
2671         switch (status) {
2672         case BE_TX_COMP_HDR_PARSE_ERR:
2673                 tx_stats(txo)->tx_hdr_parse_err++;
2674                 break;
2675         case BE_TX_COMP_NDMA_ERR:
2676                 tx_stats(txo)->tx_dma_err++;
2677                 break;
2678         case BE_TX_COMP_ACL_ERR:
2679                 tx_stats(txo)->tx_spoof_check_err++;
2680                 break;
2681         }
2682 }
2683
2684 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2685 {
2686         switch (status) {
2687         case LANCER_TX_COMP_LSO_ERR:
2688                 tx_stats(txo)->tx_tso_err++;
2689                 break;
2690         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2691         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2692                 tx_stats(txo)->tx_spoof_check_err++;
2693                 break;
2694         case LANCER_TX_COMP_QINQ_ERR:
2695                 tx_stats(txo)->tx_qinq_err++;
2696                 break;
2697         case LANCER_TX_COMP_PARITY_ERR:
2698                 tx_stats(txo)->tx_internal_parity_err++;
2699                 break;
2700         case LANCER_TX_COMP_DMA_ERR:
2701                 tx_stats(txo)->tx_dma_err++;
2702                 break;
2703         case LANCER_TX_COMP_SGE_ERR:
2704                 tx_stats(txo)->tx_sge_err++;
2705                 break;
2706         }
2707 }
2708
2709 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2710                                                 struct be_tx_obj *txo)
2711 {
2712         struct be_queue_info *tx_cq = &txo->cq;
2713         struct be_tx_compl_info *txcp = &txo->txcp;
2714         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2715
2716         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2717                 return NULL;
2718
2719         /* Ensure load ordering of valid bit dword and other dwords below */
2720         rmb();
2721         be_dws_le_to_cpu(compl, sizeof(*compl));
2722
2723         txcp->status = GET_TX_COMPL_BITS(status, compl);
2724         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2725
2726         if (txcp->status) {
2727                 if (lancer_chip(adapter)) {
2728                         lancer_update_tx_err(txo, txcp->status);
2729                         /* Reset the adapter incase of TSO,
2730                          * SGE or Parity error
2731                          */
2732                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2733                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2734                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2735                                 be_set_error(adapter, BE_ERROR_TX);
2736                 } else {
2737                         be_update_tx_err(txo, txcp->status);
2738                 }
2739         }
2740
2741         if (be_check_error(adapter, BE_ERROR_TX))
2742                 return NULL;
2743
2744         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2745         queue_tail_inc(tx_cq);
2746         return txcp;
2747 }
2748
2749 static u16 be_tx_compl_process(struct be_adapter *adapter,
2750                                struct be_tx_obj *txo, u16 last_index)
2751 {
2752         struct sk_buff **sent_skbs = txo->sent_skb_list;
2753         struct be_queue_info *txq = &txo->q;
2754         struct sk_buff *skb = NULL;
2755         bool unmap_skb_hdr = false;
2756         struct be_eth_wrb *wrb;
2757         u16 num_wrbs = 0;
2758         u32 frag_index;
2759
2760         do {
2761                 if (sent_skbs[txq->tail]) {
2762                         /* Free skb from prev req */
2763                         if (skb)
2764                                 dev_consume_skb_any(skb);
2765                         skb = sent_skbs[txq->tail];
2766                         sent_skbs[txq->tail] = NULL;
2767                         queue_tail_inc(txq);  /* skip hdr wrb */
2768                         num_wrbs++;
2769                         unmap_skb_hdr = true;
2770                 }
2771                 wrb = queue_tail_node(txq);
2772                 frag_index = txq->tail;
2773                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2774                               (unmap_skb_hdr && skb_headlen(skb)));
2775                 unmap_skb_hdr = false;
2776                 queue_tail_inc(txq);
2777                 num_wrbs++;
2778         } while (frag_index != last_index);
2779         dev_consume_skb_any(skb);
2780
2781         return num_wrbs;
2782 }
2783
2784 /* Return the number of events in the event queue */
2785 static inline int events_get(struct be_eq_obj *eqo)
2786 {
2787         struct be_eq_entry *eqe;
2788         int num = 0;
2789
2790         do {
2791                 eqe = queue_tail_node(&eqo->q);
2792                 if (eqe->evt == 0)
2793                         break;
2794
2795                 rmb();
2796                 eqe->evt = 0;
2797                 num++;
2798                 queue_tail_inc(&eqo->q);
2799         } while (true);
2800
2801         return num;
2802 }
2803
2804 /* Leaves the EQ is disarmed state */
2805 static void be_eq_clean(struct be_eq_obj *eqo)
2806 {
2807         int num = events_get(eqo);
2808
2809         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2810 }
2811
2812 /* Free posted rx buffers that were not used */
2813 static void be_rxq_clean(struct be_rx_obj *rxo)
2814 {
2815         struct be_queue_info *rxq = &rxo->q;
2816         struct be_rx_page_info *page_info;
2817
2818         while (atomic_read(&rxq->used) > 0) {
2819                 page_info = get_rx_page_info(rxo);
2820                 put_page(page_info->page);
2821                 memset(page_info, 0, sizeof(*page_info));
2822         }
2823         BUG_ON(atomic_read(&rxq->used));
2824         rxq->tail = 0;
2825         rxq->head = 0;
2826 }
2827
2828 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2829 {
2830         struct be_queue_info *rx_cq = &rxo->cq;
2831         struct be_rx_compl_info *rxcp;
2832         struct be_adapter *adapter = rxo->adapter;
2833         int flush_wait = 0;
2834
2835         /* Consume pending rx completions.
2836          * Wait for the flush completion (identified by zero num_rcvd)
2837          * to arrive. Notify CQ even when there are no more CQ entries
2838          * for HW to flush partially coalesced CQ entries.
2839          * In Lancer, there is no need to wait for flush compl.
2840          */
2841         for (;;) {
2842                 rxcp = be_rx_compl_get(rxo);
2843                 if (!rxcp) {
2844                         if (lancer_chip(adapter))
2845                                 break;
2846
2847                         if (flush_wait++ > 50 ||
2848                             be_check_error(adapter,
2849                                            BE_ERROR_HW)) {
2850                                 dev_warn(&adapter->pdev->dev,
2851                                          "did not receive flush compl\n");
2852                                 break;
2853                         }
2854                         be_cq_notify(adapter, rx_cq->id, true, 0);
2855                         mdelay(1);
2856                 } else {
2857                         be_rx_compl_discard(rxo, rxcp);
2858                         be_cq_notify(adapter, rx_cq->id, false, 1);
2859                         if (rxcp->num_rcvd == 0)
2860                                 break;
2861                 }
2862         }
2863
2864         /* After cleanup, leave the CQ in unarmed state */
2865         be_cq_notify(adapter, rx_cq->id, false, 0);
2866 }
2867
2868 static void be_tx_compl_clean(struct be_adapter *adapter)
2869 {
2870         struct device *dev = &adapter->pdev->dev;
2871         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2872         struct be_tx_compl_info *txcp;
2873         struct be_queue_info *txq;
2874         u32 end_idx, notified_idx;
2875         struct be_tx_obj *txo;
2876         int i, pending_txqs;
2877
2878         /* Stop polling for compls when HW has been silent for 10ms */
2879         do {
2880                 pending_txqs = adapter->num_tx_qs;
2881
2882                 for_all_tx_queues(adapter, txo, i) {
2883                         cmpl = 0;
2884                         num_wrbs = 0;
2885                         txq = &txo->q;
2886                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2887                                 num_wrbs +=
2888                                         be_tx_compl_process(adapter, txo,
2889                                                             txcp->end_index);
2890                                 cmpl++;
2891                         }
2892                         if (cmpl) {
2893                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2894                                 atomic_sub(num_wrbs, &txq->used);
2895                                 timeo = 0;
2896                         }
2897                         if (!be_is_tx_compl_pending(txo))
2898                                 pending_txqs--;
2899                 }
2900
2901                 if (pending_txqs == 0 || ++timeo > 10 ||
2902                     be_check_error(adapter, BE_ERROR_HW))
2903                         break;
2904
2905                 mdelay(1);
2906         } while (true);
2907
2908         /* Free enqueued TX that was never notified to HW */
2909         for_all_tx_queues(adapter, txo, i) {
2910                 txq = &txo->q;
2911
2912                 if (atomic_read(&txq->used)) {
2913                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2914                                  i, atomic_read(&txq->used));
2915                         notified_idx = txq->tail;
2916                         end_idx = txq->tail;
2917                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2918                                   txq->len);
2919                         /* Use the tx-compl process logic to handle requests
2920                          * that were not sent to the HW.
2921                          */
2922                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2923                         atomic_sub(num_wrbs, &txq->used);
2924                         BUG_ON(atomic_read(&txq->used));
2925                         txo->pend_wrb_cnt = 0;
2926                         /* Since hw was never notified of these requests,
2927                          * reset TXQ indices
2928                          */
2929                         txq->head = notified_idx;
2930                         txq->tail = notified_idx;
2931                 }
2932         }
2933 }
2934
2935 static void be_evt_queues_destroy(struct be_adapter *adapter)
2936 {
2937         struct be_eq_obj *eqo;
2938         int i;
2939
2940         for_all_evt_queues(adapter, eqo, i) {
2941                 if (eqo->q.created) {
2942                         be_eq_clean(eqo);
2943                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2944                         netif_napi_del(&eqo->napi);
2945                         free_cpumask_var(eqo->affinity_mask);
2946                 }
2947                 be_queue_free(adapter, &eqo->q);
2948         }
2949 }
2950
2951 static int be_evt_queues_create(struct be_adapter *adapter)
2952 {
2953         struct be_queue_info *eq;
2954         struct be_eq_obj *eqo;
2955         struct be_aic_obj *aic;
2956         int i, rc;
2957
2958         /* need enough EQs to service both RX and TX queues */
2959         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2960                                     max(adapter->cfg_num_rx_irqs,
2961                                         adapter->cfg_num_tx_irqs));
2962
2963         adapter->aic_enabled = true;
2964
2965         for_all_evt_queues(adapter, eqo, i) {
2966                 int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968                 aic = &adapter->aic_obj[i];
2969                 eqo->adapter = adapter;
2970                 eqo->idx = i;
2971                 aic->max_eqd = BE_MAX_EQD;
2972
2973                 eq = &eqo->q;
2974                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2975                                     sizeof(struct be_eq_entry));
2976                 if (rc)
2977                         return rc;
2978
2979                 rc = be_cmd_eq_create(adapter, eqo);
2980                 if (rc)
2981                         return rc;
2982
2983                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2984                         return -ENOMEM;
2985                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2986                                 eqo->affinity_mask);
2987                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll);
2988         }
2989         return 0;
2990 }
2991
2992 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993 {
2994         struct be_queue_info *q;
2995
2996         q = &adapter->mcc_obj.q;
2997         if (q->created)
2998                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999         be_queue_free(adapter, q);
3000
3001         q = &adapter->mcc_obj.cq;
3002         if (q->created)
3003                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004         be_queue_free(adapter, q);
3005 }
3006
3007 /* Must be called only after TX qs are created as MCC shares TX EQ */
3008 static int be_mcc_queues_create(struct be_adapter *adapter)
3009 {
3010         struct be_queue_info *q, *cq;
3011
3012         cq = &adapter->mcc_obj.cq;
3013         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014                            sizeof(struct be_mcc_compl)))
3015                 goto err;
3016
3017         /* Use the default EQ for MCC completions */
3018         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019                 goto mcc_cq_free;
3020
3021         q = &adapter->mcc_obj.q;
3022         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023                 goto mcc_cq_destroy;
3024
3025         if (be_cmd_mccq_create(adapter, q, cq))
3026                 goto mcc_q_free;
3027
3028         return 0;
3029
3030 mcc_q_free:
3031         be_queue_free(adapter, q);
3032 mcc_cq_destroy:
3033         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034 mcc_cq_free:
3035         be_queue_free(adapter, cq);
3036 err:
3037         return -1;
3038 }
3039
3040 static void be_tx_queues_destroy(struct be_adapter *adapter)
3041 {
3042         struct be_queue_info *q;
3043         struct be_tx_obj *txo;
3044         u8 i;
3045
3046         for_all_tx_queues(adapter, txo, i) {
3047                 q = &txo->q;
3048                 if (q->created)
3049                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050                 be_queue_free(adapter, q);
3051
3052                 q = &txo->cq;
3053                 if (q->created)
3054                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055                 be_queue_free(adapter, q);
3056         }
3057 }
3058
3059 static int be_tx_qs_create(struct be_adapter *adapter)
3060 {
3061         struct be_queue_info *cq;
3062         struct be_tx_obj *txo;
3063         struct be_eq_obj *eqo;
3064         int status, i;
3065
3066         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068         for_all_tx_queues(adapter, txo, i) {
3069                 cq = &txo->cq;
3070                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071                                         sizeof(struct be_eth_tx_compl));
3072                 if (status)
3073                         return status;
3074
3075                 u64_stats_init(&txo->stats.sync);
3076                 u64_stats_init(&txo->stats.sync_compl);
3077
3078                 /* If num_evt_qs is less than num_tx_qs, then more than
3079                  * one txq share an eq
3080                  */
3081                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083                 if (status)
3084                         return status;
3085
3086                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087                                         sizeof(struct be_eth_wrb));
3088                 if (status)
3089                         return status;
3090
3091                 status = be_cmd_txq_create(adapter, txo);
3092                 if (status)
3093                         return status;
3094
3095                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096                                     eqo->idx);
3097         }
3098
3099         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100                  adapter->num_tx_qs);
3101         return 0;
3102 }
3103
3104 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105 {
3106         struct be_queue_info *q;
3107         struct be_rx_obj *rxo;
3108         int i;
3109
3110         for_all_rx_queues(adapter, rxo, i) {
3111                 q = &rxo->cq;
3112                 if (q->created)
3113                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114                 be_queue_free(adapter, q);
3115         }
3116 }
3117
3118 static int be_rx_cqs_create(struct be_adapter *adapter)
3119 {
3120         struct be_queue_info *eq, *cq;
3121         struct be_rx_obj *rxo;
3122         int rc, i;
3123
3124         adapter->num_rss_qs =
3125                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3128         if (adapter->num_rss_qs < 2)
3129                 adapter->num_rss_qs = 0;
3130
3131         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133         /* When the interface is not capable of RSS rings (and there is no
3134          * need to create a default RXQ) we'll still need one RXQ
3135          */
3136         if (adapter->num_rx_qs == 0)
3137                 adapter->num_rx_qs = 1;
3138
3139         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140         for_all_rx_queues(adapter, rxo, i) {
3141                 rxo->adapter = adapter;
3142                 cq = &rxo->cq;
3143                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144                                     sizeof(struct be_eth_rx_compl));
3145                 if (rc)
3146                         return rc;
3147
3148                 u64_stats_init(&rxo->stats.sync);
3149                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151                 if (rc)
3152                         return rc;
3153         }
3154
3155         dev_info(&adapter->pdev->dev,
3156                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3157         return 0;
3158 }
3159
3160 static irqreturn_t be_intx(int irq, void *dev)
3161 {
3162         struct be_eq_obj *eqo = dev;
3163         struct be_adapter *adapter = eqo->adapter;
3164         int num_evts = 0;
3165
3166         /* IRQ is not expected when NAPI is scheduled as the EQ
3167          * will not be armed.
3168          * But, this can happen on Lancer INTx where it takes
3169          * a while to de-assert INTx or in BE2 where occasionaly
3170          * an interrupt may be raised even when EQ is unarmed.
3171          * If NAPI is already scheduled, then counting & notifying
3172          * events will orphan them.
3173          */
3174         if (napi_schedule_prep(&eqo->napi)) {
3175                 num_evts = events_get(eqo);
3176                 __napi_schedule(&eqo->napi);
3177                 if (num_evts)
3178                         eqo->spurious_intr = 0;
3179         }
3180         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182         /* Return IRQ_HANDLED only for the first spurious intr
3183          * after a valid intr to stop the kernel from branding
3184          * this irq as a bad one!
3185          */
3186         if (num_evts || eqo->spurious_intr++ == 0)
3187                 return IRQ_HANDLED;
3188         else
3189                 return IRQ_NONE;
3190 }
3191
3192 static irqreturn_t be_msix(int irq, void *dev)
3193 {
3194         struct be_eq_obj *eqo = dev;
3195
3196         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197         napi_schedule(&eqo->napi);
3198         return IRQ_HANDLED;
3199 }
3200
3201 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202 {
3203         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204 }
3205
3206 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207                          int budget)
3208 {
3209         struct be_adapter *adapter = rxo->adapter;
3210         struct be_queue_info *rx_cq = &rxo->cq;
3211         struct be_rx_compl_info *rxcp;
3212         u32 work_done;
3213         u32 frags_consumed = 0;
3214
3215         for (work_done = 0; work_done < budget; work_done++) {
3216                 rxcp = be_rx_compl_get(rxo);
3217                 if (!rxcp)
3218                         break;
3219
3220                 /* Is it a flush compl that has no data */
3221                 if (unlikely(rxcp->num_rcvd == 0))
3222                         goto loop_continue;
3223
3224                 /* Discard compl with partial DMA Lancer B0 */
3225                 if (unlikely(!rxcp->pkt_size)) {
3226                         be_rx_compl_discard(rxo, rxcp);
3227                         goto loop_continue;
3228                 }
3229
3230                 /* On BE drop pkts that arrive due to imperfect filtering in
3231                  * promiscuous mode on some skews
3232                  */
3233                 if (unlikely(rxcp->port != adapter->port_num &&
3234                              !lancer_chip(adapter))) {
3235                         be_rx_compl_discard(rxo, rxcp);
3236                         goto loop_continue;
3237                 }
3238
3239                 if (do_gro(rxcp))
3240                         be_rx_compl_process_gro(rxo, napi, rxcp);
3241                 else
3242                         be_rx_compl_process(rxo, napi, rxcp);
3243
3244 loop_continue:
3245                 frags_consumed += rxcp->num_rcvd;
3246                 be_rx_stats_update(rxo, rxcp);
3247         }
3248
3249         if (work_done) {
3250                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252                 /* When an rx-obj gets into post_starved state, just
3253                  * let be_worker do the posting.
3254                  */
3255                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256                     !rxo->rx_post_starved)
3257                         be_post_rx_frags(rxo, GFP_ATOMIC,
3258                                          max_t(u32, MAX_RX_POST,
3259                                                frags_consumed));
3260         }
3261
3262         return work_done;
3263 }
3264
3265
3266 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267                           int idx)
3268 {
3269         int num_wrbs = 0, work_done = 0;
3270         struct be_tx_compl_info *txcp;
3271
3272         while ((txcp = be_tx_compl_get(adapter, txo))) {
3273                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274                 work_done++;
3275         }
3276
3277         if (work_done) {
3278                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3279                 atomic_sub(num_wrbs, &txo->q.used);
3280
3281                 /* As Tx wrbs have been freed up, wake up netdev queue
3282                  * if it was stopped due to lack of tx wrbs.  */
3283                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284                     be_can_txq_wake(txo)) {
3285                         netif_wake_subqueue(adapter->netdev, idx);
3286                 }
3287
3288                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289                 tx_stats(txo)->tx_compl += work_done;
3290                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291         }
3292 }
3293
3294 int be_poll(struct napi_struct *napi, int budget)
3295 {
3296         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297         struct be_adapter *adapter = eqo->adapter;
3298         int max_work = 0, work, i, num_evts;
3299         struct be_rx_obj *rxo;
3300         struct be_tx_obj *txo;
3301         u32 mult_enc = 0;
3302
3303         num_evts = events_get(eqo);
3304
3305         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306                 be_process_tx(adapter, txo, i);
3307
3308         /* This loop will iterate twice for EQ0 in which
3309          * completions of the last RXQ (default one) are also processed
3310          * For other EQs the loop iterates only once
3311          */
3312         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313                 work = be_process_rx(rxo, napi, budget);
3314                 max_work = max(work, max_work);
3315         }
3316
3317         if (is_mcc_eqo(eqo))
3318                 be_process_mcc(adapter);
3319
3320         if (max_work < budget) {
3321                 napi_complete_done(napi, max_work);
3322
3323                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324                  * delay via a delay multiplier encoding value
3325                  */
3326                 if (skyhawk_chip(adapter))
3327                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330                              mult_enc);
3331         } else {
3332                 /* As we'll continue in polling mode, count and clear events */
3333                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334         }
3335         return max_work;
3336 }
3337
3338 void be_detect_error(struct be_adapter *adapter)
3339 {
3340         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342         struct device *dev = &adapter->pdev->dev;
3343         u16 val;
3344         u32 i;
3345
3346         if (be_check_error(adapter, BE_ERROR_HW))
3347                 return;
3348
3349         if (lancer_chip(adapter)) {
3350                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352                         be_set_error(adapter, BE_ERROR_UE);
3353                         sliport_err1 = ioread32(adapter->db +
3354                                                 SLIPORT_ERROR1_OFFSET);
3355                         sliport_err2 = ioread32(adapter->db +
3356                                                 SLIPORT_ERROR2_OFFSET);
3357                         /* Do not log error messages if its a FW reset */
3358                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360                                 dev_info(dev, "Reset is in progress\n");
3361                         } else {
3362                                 dev_err(dev, "Error detected in the card\n");
3363                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3364                                         sliport_status);
3365                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3366                                         sliport_err1);
3367                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3368                                         sliport_err2);
3369                         }
3370                 }
3371         } else {
3372                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374                 ue_lo_mask = ioread32(adapter->pcicfg +
3375                                       PCICFG_UE_STATUS_LOW_MASK);
3376                 ue_hi_mask = ioread32(adapter->pcicfg +
3377                                       PCICFG_UE_STATUS_HI_MASK);
3378
3379                 ue_lo = (ue_lo & ~ue_lo_mask);
3380                 ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382                 if (ue_lo || ue_hi) {
3383                         /* On certain platforms BE3 hardware can indicate
3384                          * spurious UEs. In case of a UE in the chip,
3385                          * the POST register correctly reports either a
3386                          * FAT_LOG_START state (FW is currently dumping
3387                          * FAT log data) or a ARMFW_UE state. Check for the
3388                          * above states to ascertain if the UE is valid or not.
3389                          */
3390                         if (BE3_chip(adapter)) {
3391                                 val = be_POST_stage_get(adapter);
3392                                 if ((val & POST_STAGE_FAT_LOG_START)
3393                                      != POST_STAGE_FAT_LOG_START &&
3394                                     (val & POST_STAGE_ARMFW_UE)
3395                                      != POST_STAGE_ARMFW_UE &&
3396                                     (val & POST_STAGE_RECOVERABLE_ERR)
3397                                      != POST_STAGE_RECOVERABLE_ERR)
3398                                         return;
3399                         }
3400
3401                         dev_err(dev, "Error detected in the adapter");
3402                         be_set_error(adapter, BE_ERROR_UE);
3403
3404                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405                                 if (ue_lo & 1)
3406                                         dev_err(dev, "UE: %s bit set\n",
3407                                                 ue_status_low_desc[i]);
3408                         }
3409                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410                                 if (ue_hi & 1)
3411                                         dev_err(dev, "UE: %s bit set\n",
3412                                                 ue_status_hi_desc[i]);
3413                         }
3414                 }
3415         }
3416 }
3417
3418 static void be_msix_disable(struct be_adapter *adapter)
3419 {
3420         if (msix_enabled(adapter)) {
3421                 pci_disable_msix(adapter->pdev);
3422                 adapter->num_msix_vec = 0;
3423                 adapter->num_msix_roce_vec = 0;
3424         }
3425 }
3426
3427 static int be_msix_enable(struct be_adapter *adapter)
3428 {
3429         unsigned int i, max_roce_eqs;
3430         struct device *dev = &adapter->pdev->dev;
3431         int num_vec;
3432
3433         /* If RoCE is supported, program the max number of vectors that
3434          * could be used for NIC and RoCE, else, just program the number
3435          * we'll use initially.
3436          */
3437         if (be_roce_supported(adapter)) {
3438                 max_roce_eqs =
3439                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442         } else {
3443                 num_vec = max(adapter->cfg_num_rx_irqs,
3444                               adapter->cfg_num_tx_irqs);
3445         }
3446
3447         for (i = 0; i < num_vec; i++)
3448                 adapter->msix_entries[i].entry = i;
3449
3450         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451                                         MIN_MSIX_VECTORS, num_vec);
3452         if (num_vec < 0)
3453                 goto fail;
3454
3455         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456                 adapter->num_msix_roce_vec = num_vec / 2;
3457                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458                          adapter->num_msix_roce_vec);
3459         }
3460
3461         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464                  adapter->num_msix_vec);
3465         return 0;
3466
3467 fail:
3468         dev_warn(dev, "MSIx enable failed\n");
3469
3470         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471         if (be_virtfn(adapter))
3472                 return num_vec;
3473         return 0;
3474 }
3475
3476 static inline int be_msix_vec_get(struct be_adapter *adapter,
3477                                   struct be_eq_obj *eqo)
3478 {
3479         return adapter->msix_entries[eqo->msix_idx].vector;
3480 }
3481
3482 static int be_msix_register(struct be_adapter *adapter)
3483 {
3484         struct net_device *netdev = adapter->netdev;
3485         struct be_eq_obj *eqo;
3486         int status, i, vec;
3487
3488         for_all_evt_queues(adapter, eqo, i) {
3489                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490                 vec = be_msix_vec_get(adapter, eqo);
3491                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492                 if (status)
3493                         goto err_msix;
3494
3495                 irq_update_affinity_hint(vec, eqo->affinity_mask);
3496         }
3497
3498         return 0;
3499 err_msix:
3500         for (i--; i >= 0; i--) {
3501                 eqo = &adapter->eq_obj[i];
3502                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503         }
3504         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505                  status);
3506         be_msix_disable(adapter);
3507         return status;
3508 }
3509
3510 static int be_irq_register(struct be_adapter *adapter)
3511 {
3512         struct net_device *netdev = adapter->netdev;
3513         int status;
3514
3515         if (msix_enabled(adapter)) {
3516                 status = be_msix_register(adapter);
3517                 if (status == 0)
3518                         goto done;
3519                 /* INTx is not supported for VF */
3520                 if (be_virtfn(adapter))
3521                         return status;
3522         }
3523
3524         /* INTx: only the first EQ is used */
3525         netdev->irq = adapter->pdev->irq;
3526         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527                              &adapter->eq_obj[0]);
3528         if (status) {
3529                 dev_err(&adapter->pdev->dev,
3530                         "INTx request IRQ failed - err %d\n", status);
3531                 return status;
3532         }
3533 done:
3534         adapter->isr_registered = true;
3535         return 0;
3536 }
3537
3538 static void be_irq_unregister(struct be_adapter *adapter)
3539 {
3540         struct net_device *netdev = adapter->netdev;
3541         struct be_eq_obj *eqo;
3542         int i, vec;
3543
3544         if (!adapter->isr_registered)
3545                 return;
3546
3547         /* INTx */
3548         if (!msix_enabled(adapter)) {
3549                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3550                 goto done;
3551         }
3552
3553         /* MSIx */
3554         for_all_evt_queues(adapter, eqo, i) {
3555                 vec = be_msix_vec_get(adapter, eqo);
3556                 irq_update_affinity_hint(vec, NULL);
3557                 free_irq(vec, eqo);
3558         }
3559
3560 done:
3561         adapter->isr_registered = false;
3562 }
3563
3564 static void be_rx_qs_destroy(struct be_adapter *adapter)
3565 {
3566         struct rss_info *rss = &adapter->rss_info;
3567         struct be_queue_info *q;
3568         struct be_rx_obj *rxo;
3569         int i;
3570
3571         for_all_rx_queues(adapter, rxo, i) {
3572                 q = &rxo->q;
3573                 if (q->created) {
3574                         /* If RXQs are destroyed while in an "out of buffer"
3575                          * state, there is a possibility of an HW stall on
3576                          * Lancer. So, post 64 buffers to each queue to relieve
3577                          * the "out of buffer" condition.
3578                          * Make sure there's space in the RXQ before posting.
3579                          */
3580                         if (lancer_chip(adapter)) {
3581                                 be_rx_cq_clean(rxo);
3582                                 if (atomic_read(&q->used) == 0)
3583                                         be_post_rx_frags(rxo, GFP_KERNEL,
3584                                                          MAX_RX_POST);
3585                         }
3586
3587                         be_cmd_rxq_destroy(adapter, q);
3588                         be_rx_cq_clean(rxo);
3589                         be_rxq_clean(rxo);
3590                 }
3591                 be_queue_free(adapter, q);
3592         }
3593
3594         if (rss->rss_flags) {
3595                 rss->rss_flags = RSS_ENABLE_NONE;
3596                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597                                   128, rss->rss_hkey);
3598         }
3599 }
3600
3601 static void be_disable_if_filters(struct be_adapter *adapter)
3602 {
3603         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607                 eth_zero_addr(adapter->dev_mac);
3608         }
3609
3610         be_clear_uc_list(adapter);
3611         be_clear_mc_list(adapter);
3612
3613         /* The IFACE flags are enabled in the open path and cleared
3614          * in the close path. When a VF gets detached from the host and
3615          * assigned to a VM the following happens:
3616          *      - VF's IFACE flags get cleared in the detach path
3617          *      - IFACE create is issued by the VF in the attach path
3618          * Due to a bug in the BE3/Skyhawk-R FW
3619          * (Lancer FW doesn't have the bug), the IFACE capability flags
3620          * specified along with the IFACE create cmd issued by a VF are not
3621          * honoured by FW.  As a consequence, if a *new* driver
3622          * (that enables/disables IFACE flags in open/close)
3623          * is loaded in the host and an *old* driver is * used by a VM/VF,
3624          * the IFACE gets created *without* the needed flags.
3625          * To avoid this, disable RX-filter flags only for Lancer.
3626          */
3627         if (lancer_chip(adapter)) {
3628                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630         }
3631 }
3632
3633 static int be_close(struct net_device *netdev)
3634 {
3635         struct be_adapter *adapter = netdev_priv(netdev);
3636         struct be_eq_obj *eqo;
3637         int i;
3638
3639         /* This protection is needed as be_close() may be called even when the
3640          * adapter is in cleared state (after eeh perm failure)
3641          */
3642         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643                 return 0;
3644
3645         /* Before attempting cleanup ensure all the pending cmds in the
3646          * config_wq have finished execution
3647          */
3648         flush_workqueue(be_wq);
3649
3650         be_disable_if_filters(adapter);
3651
3652         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653                 for_all_evt_queues(adapter, eqo, i) {
3654                         napi_disable(&eqo->napi);
3655                 }
3656                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657         }
3658
3659         be_async_mcc_disable(adapter);
3660
3661         /* Wait for all pending tx completions to arrive so that
3662          * all tx skbs are freed.
3663          */
3664         netif_tx_disable(netdev);
3665         be_tx_compl_clean(adapter);
3666
3667         be_rx_qs_destroy(adapter);
3668
3669         for_all_evt_queues(adapter, eqo, i) {
3670                 if (msix_enabled(adapter))
3671                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3672                 else
3673                         synchronize_irq(netdev->irq);
3674                 be_eq_clean(eqo);
3675         }
3676
3677         be_irq_unregister(adapter);
3678
3679         return 0;
3680 }
3681
3682 static int be_rx_qs_create(struct be_adapter *adapter)
3683 {
3684         struct rss_info *rss = &adapter->rss_info;
3685         u8 rss_key[RSS_HASH_KEY_LEN];
3686         struct be_rx_obj *rxo;
3687         int rc, i, j;
3688
3689         for_all_rx_queues(adapter, rxo, i) {
3690                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691                                     sizeof(struct be_eth_rx_d));
3692                 if (rc)
3693                         return rc;
3694         }
3695
3696         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697                 rxo = default_rxo(adapter);
3698                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699                                        rx_frag_size, adapter->if_handle,
3700                                        false, &rxo->rss_id);
3701                 if (rc)
3702                         return rc;
3703         }
3704
3705         for_all_rss_queues(adapter, rxo, i) {
3706                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707                                        rx_frag_size, adapter->if_handle,
3708                                        true, &rxo->rss_id);
3709                 if (rc)
3710                         return rc;
3711         }
3712
3713         if (be_multi_rxq(adapter)) {
3714                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715                         for_all_rss_queues(adapter, rxo, i) {
3716                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717                                         break;
3718                                 rss->rsstable[j + i] = rxo->rss_id;
3719                                 rss->rss_queue[j + i] = i;
3720                         }
3721                 }
3722                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725                 if (!BEx_chip(adapter))
3726                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727                                 RSS_ENABLE_UDP_IPV6;
3728
3729                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731                                        RSS_INDIR_TABLE_LEN, rss_key);
3732                 if (rc) {
3733                         rss->rss_flags = RSS_ENABLE_NONE;
3734                         return rc;
3735                 }
3736
3737                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738         } else {
3739                 /* Disable RSS, if only default RX Q is created */
3740                 rss->rss_flags = RSS_ENABLE_NONE;
3741         }
3742
3743
3744         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3745          * which is a queue empty condition
3746          */
3747         for_all_rx_queues(adapter, rxo, i)
3748                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750         return 0;
3751 }
3752
3753 static int be_enable_if_filters(struct be_adapter *adapter)
3754 {
3755         int status;
3756
3757         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758         if (status)
3759                 return status;
3760
3761         /* Normally this condition usually true as the ->dev_mac is zeroed.
3762          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763          * subsequent be_dev_mac_add() can fail (after fresh boot)
3764          */
3765         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766                 int old_pmac_id = -1;
3767
3768                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3769                 if (!is_zero_ether_addr(adapter->dev_mac))
3770                         old_pmac_id = adapter->pmac_id[0];
3771
3772                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773                 if (status)
3774                         return status;
3775
3776                 /* Delete the old programmed MAC as we successfully programmed
3777                  * a new MAC
3778                  */
3779                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780                         be_dev_mac_del(adapter, old_pmac_id);
3781
3782                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783         }
3784
3785         if (adapter->vlans_added)
3786                 be_vid_config(adapter);
3787
3788         __be_set_rx_mode(adapter);
3789
3790         return 0;
3791 }
3792
3793 static int be_open(struct net_device *netdev)
3794 {
3795         struct be_adapter *adapter = netdev_priv(netdev);
3796         struct be_eq_obj *eqo;
3797         struct be_rx_obj *rxo;
3798         struct be_tx_obj *txo;
3799         u8 link_status;
3800         int status, i;
3801
3802         status = be_rx_qs_create(adapter);
3803         if (status)
3804                 goto err;
3805
3806         status = be_enable_if_filters(adapter);
3807         if (status)
3808                 goto err;
3809
3810         status = be_irq_register(adapter);
3811         if (status)
3812                 goto err;
3813
3814         for_all_rx_queues(adapter, rxo, i)
3815                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817         for_all_tx_queues(adapter, txo, i)
3818                 be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820         be_async_mcc_enable(adapter);
3821
3822         for_all_evt_queues(adapter, eqo, i) {
3823                 napi_enable(&eqo->napi);
3824                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825         }
3826         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829         if (!status)
3830                 be_link_status_update(adapter, link_status);
3831
3832         netif_tx_start_all_queues(netdev);
3833
3834         udp_tunnel_nic_reset_ntf(netdev);
3835
3836         return 0;
3837 err:
3838         be_close(adapter->netdev);
3839         return -EIO;
3840 }
3841
3842 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843 {
3844         u32 addr;
3845
3846         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848         mac[5] = (u8)(addr & 0xFF);
3849         mac[4] = (u8)((addr >> 8) & 0xFF);
3850         mac[3] = (u8)((addr >> 16) & 0xFF);
3851         /* Use the OUI from the current MAC address */
3852         memcpy(mac, adapter->netdev->dev_addr, 3);
3853 }
3854
3855 /*
3856  * Generate a seed MAC address from the PF MAC Address using jhash.
3857  * MAC Address for VFs are assigned incrementally starting from the seed.
3858  * These addresses are programmed in the ASIC by the PF and the VF driver
3859  * queries for the MAC address during its probe.
3860  */
3861 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862 {
3863         u32 vf;
3864         int status = 0;
3865         u8 mac[ETH_ALEN];
3866         struct be_vf_cfg *vf_cfg;
3867
3868         be_vf_eth_addr_generate(adapter, mac);
3869
3870         for_all_vfs(adapter, vf_cfg, vf) {
3871                 if (BEx_chip(adapter))
3872                         status = be_cmd_pmac_add(adapter, mac,
3873                                                  vf_cfg->if_handle,
3874                                                  &vf_cfg->pmac_id, vf + 1);
3875                 else
3876                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877                                                 vf + 1);
3878
3879                 if (status)
3880                         dev_err(&adapter->pdev->dev,
3881                                 "Mac address assignment failed for VF %d\n",
3882                                 vf);
3883                 else
3884                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886                 mac[5] += 1;
3887         }
3888         return status;
3889 }
3890
3891 static int be_vfs_mac_query(struct be_adapter *adapter)
3892 {
3893         int status, vf;
3894         u8 mac[ETH_ALEN];
3895         struct be_vf_cfg *vf_cfg;
3896
3897         for_all_vfs(adapter, vf_cfg, vf) {
3898                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899                                                mac, vf_cfg->if_handle,
3900                                                false, vf+1);
3901                 if (status)
3902                         return status;
3903                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904         }
3905         return 0;
3906 }
3907
3908 static void be_vf_clear(struct be_adapter *adapter)
3909 {
3910         struct be_vf_cfg *vf_cfg;
3911         u32 vf;
3912
3913         if (pci_vfs_assigned(adapter->pdev)) {
3914                 dev_warn(&adapter->pdev->dev,
3915                          "VFs are assigned to VMs: not disabling VFs\n");
3916                 goto done;
3917         }
3918
3919         pci_disable_sriov(adapter->pdev);
3920
3921         for_all_vfs(adapter, vf_cfg, vf) {
3922                 if (BEx_chip(adapter))
3923                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924                                         vf_cfg->pmac_id, vf + 1);
3925                 else
3926                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927                                        vf + 1);
3928
3929                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930         }
3931
3932         if (BE3_chip(adapter))
3933                 be_cmd_set_hsw_config(adapter, 0, 0,
3934                                       adapter->if_handle,
3935                                       PORT_FWD_TYPE_PASSTHRU, 0);
3936 done:
3937         kfree(adapter->vf_cfg);
3938         adapter->num_vfs = 0;
3939         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940 }
3941
3942 static void be_clear_queues(struct be_adapter *adapter)
3943 {
3944         be_mcc_queues_destroy(adapter);
3945         be_rx_cqs_destroy(adapter);
3946         be_tx_queues_destroy(adapter);
3947         be_evt_queues_destroy(adapter);
3948 }
3949
3950 static void be_cancel_worker(struct be_adapter *adapter)
3951 {
3952         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953                 cancel_delayed_work_sync(&adapter->work);
3954                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955         }
3956 }
3957
3958 static void be_cancel_err_detection(struct be_adapter *adapter)
3959 {
3960         struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962         if (!be_err_recovery_workq)
3963                 return;
3964
3965         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3967                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968         }
3969 }
3970
3971 /* VxLAN offload Notes:
3972  *
3973  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3974  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3975  * is expected to work across all types of IP tunnels once exported. Skyhawk
3976  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3977  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3978  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3979  * those other tunnels are unexported on the fly through ndo_features_check().
3980  */
3981 static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3982                              unsigned int entry, struct udp_tunnel_info *ti)
3983 {
3984         struct be_adapter *adapter = netdev_priv(netdev);
3985         struct device *dev = &adapter->pdev->dev;
3986         int status;
3987
3988         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3989                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3990         if (status) {
3991                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3992                 return status;
3993         }
3994         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3995
3996         status = be_cmd_set_vxlan_port(adapter, ti->port);
3997         if (status) {
3998                 dev_warn(dev, "Failed to add VxLAN port\n");
3999                 return status;
4000         }
4001         adapter->vxlan_port = ti->port;
4002
4003         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4004                                    NETIF_F_TSO | NETIF_F_TSO6 |
4005                                    NETIF_F_GSO_UDP_TUNNEL;
4006
4007         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4008                  be16_to_cpu(ti->port));
4009         return 0;
4010 }
4011
4012 static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4013                                unsigned int entry, struct udp_tunnel_info *ti)
4014 {
4015         struct be_adapter *adapter = netdev_priv(netdev);
4016
4017         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018                 be_cmd_manage_iface(adapter, adapter->if_handle,
4019                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021         if (adapter->vxlan_port)
4022                 be_cmd_set_vxlan_port(adapter, 0);
4023
4024         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025         adapter->vxlan_port = 0;
4026
4027         netdev->hw_enc_features = 0;
4028         return 0;
4029 }
4030
4031 static const struct udp_tunnel_nic_info be_udp_tunnels = {
4032         .set_port       = be_vxlan_set_port,
4033         .unset_port     = be_vxlan_unset_port,
4034         .flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4035                           UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4036         .tables         = {
4037                 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4038         },
4039 };
4040
4041 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4042                                 struct be_resources *vft_res)
4043 {
4044         struct be_resources res = adapter->pool_res;
4045         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4046         struct be_resources res_mod = {0};
4047         u16 num_vf_qs = 1;
4048
4049         /* Distribute the queue resources among the PF and it's VFs */
4050         if (num_vfs) {
4051                 /* Divide the rx queues evenly among the VFs and the PF, capped
4052                  * at VF-EQ-count. Any remainder queues belong to the PF.
4053                  */
4054                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4055                                 res.max_rss_qs / (num_vfs + 1));
4056
4057                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4058                  * RSS Tables per port. Provide RSS on VFs, only if number of
4059                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4060                  */
4061                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4062                         num_vf_qs = 1;
4063         }
4064
4065         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4066          * which are modifiable using SET_PROFILE_CONFIG cmd.
4067          */
4068         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4069                                   RESOURCE_MODIFIABLE, 0);
4070
4071         /* If RSS IFACE capability flags are modifiable for a VF, set the
4072          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4073          * more than 1 RSSQ is available for a VF.
4074          * Otherwise, provision only 1 queue pair for VF.
4075          */
4076         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4077                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078                 if (num_vf_qs > 1) {
4079                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4080                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4081                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4082                 } else {
4083                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4084                                              BE_IF_FLAGS_DEFQ_RSS);
4085                 }
4086         } else {
4087                 num_vf_qs = 1;
4088         }
4089
4090         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4091                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4092                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4093         }
4094
4095         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4096         vft_res->max_rx_qs = num_vf_qs;
4097         vft_res->max_rss_qs = num_vf_qs;
4098         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4099         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4100
4101         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4102          * among the PF and it's VFs, if the fields are changeable
4103          */
4104         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4105                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4106
4107         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4108                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4109
4110         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4111                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4112
4113         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4114                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4115 }
4116
4117 static void be_if_destroy(struct be_adapter *adapter)
4118 {
4119         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4120
4121         kfree(adapter->pmac_id);
4122         adapter->pmac_id = NULL;
4123
4124         kfree(adapter->mc_list);
4125         adapter->mc_list = NULL;
4126
4127         kfree(adapter->uc_list);
4128         adapter->uc_list = NULL;
4129 }
4130
4131 static int be_clear(struct be_adapter *adapter)
4132 {
4133         struct pci_dev *pdev = adapter->pdev;
4134         struct  be_resources vft_res = {0};
4135
4136         be_cancel_worker(adapter);
4137
4138         flush_workqueue(be_wq);
4139
4140         if (sriov_enabled(adapter))
4141                 be_vf_clear(adapter);
4142
4143         /* Re-configure FW to distribute resources evenly across max-supported
4144          * number of VFs, only when VFs are not already enabled.
4145          */
4146         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4147             !pci_vfs_assigned(pdev)) {
4148                 be_calculate_vf_res(adapter,
4149                                     pci_sriov_get_totalvfs(pdev),
4150                                     &vft_res);
4151                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4152                                         pci_sriov_get_totalvfs(pdev),
4153                                         &vft_res);
4154         }
4155
4156         be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4157
4158         be_if_destroy(adapter);
4159
4160         be_clear_queues(adapter);
4161
4162         be_msix_disable(adapter);
4163         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4164         return 0;
4165 }
4166
4167 static int be_vfs_if_create(struct be_adapter *adapter)
4168 {
4169         struct be_resources res = {0};
4170         u32 cap_flags, en_flags, vf;
4171         struct be_vf_cfg *vf_cfg;
4172         int status;
4173
4174         /* If a FW profile exists, then cap_flags are updated */
4175         cap_flags = BE_VF_IF_EN_FLAGS;
4176
4177         for_all_vfs(adapter, vf_cfg, vf) {
4178                 if (!BE3_chip(adapter)) {
4179                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4180                                                            ACTIVE_PROFILE_TYPE,
4181                                                            RESOURCE_LIMITS,
4182                                                            vf + 1);
4183                         if (!status) {
4184                                 cap_flags = res.if_cap_flags;
4185                                 /* Prevent VFs from enabling VLAN promiscuous
4186                                  * mode
4187                                  */
4188                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4189                         }
4190                 }
4191
4192                 /* PF should enable IF flags during proxy if_create call */
4193                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4194                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4195                                           &vf_cfg->if_handle, vf + 1);
4196                 if (status)
4197                         return status;
4198         }
4199
4200         return 0;
4201 }
4202
4203 static int be_vf_setup_init(struct be_adapter *adapter)
4204 {
4205         struct be_vf_cfg *vf_cfg;
4206         int vf;
4207
4208         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4209                                   GFP_KERNEL);
4210         if (!adapter->vf_cfg)
4211                 return -ENOMEM;
4212
4213         for_all_vfs(adapter, vf_cfg, vf) {
4214                 vf_cfg->if_handle = -1;
4215                 vf_cfg->pmac_id = -1;
4216         }
4217         return 0;
4218 }
4219
4220 static int be_vf_setup(struct be_adapter *adapter)
4221 {
4222         struct device *dev = &adapter->pdev->dev;
4223         struct be_vf_cfg *vf_cfg;
4224         int status, old_vfs, vf;
4225         bool spoofchk;
4226
4227         old_vfs = pci_num_vf(adapter->pdev);
4228
4229         status = be_vf_setup_init(adapter);
4230         if (status)
4231                 goto err;
4232
4233         if (old_vfs) {
4234                 for_all_vfs(adapter, vf_cfg, vf) {
4235                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4236                         if (status)
4237                                 goto err;
4238                 }
4239
4240                 status = be_vfs_mac_query(adapter);
4241                 if (status)
4242                         goto err;
4243         } else {
4244                 status = be_vfs_if_create(adapter);
4245                 if (status)
4246                         goto err;
4247
4248                 status = be_vf_eth_addr_config(adapter);
4249                 if (status)
4250                         goto err;
4251         }
4252
4253         for_all_vfs(adapter, vf_cfg, vf) {
4254                 /* Allow VFs to programs MAC/VLAN filters */
4255                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4256                                                   vf + 1);
4257                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4258                         status = be_cmd_set_fn_privileges(adapter,
4259                                                           vf_cfg->privileges |
4260                                                           BE_PRIV_FILTMGMT,
4261                                                           vf + 1);
4262                         if (!status) {
4263                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4264                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4265                                          vf);
4266                         }
4267                 }
4268
4269                 /* Allow full available bandwidth */
4270                 if (!old_vfs)
4271                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4272
4273                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4274                                                vf_cfg->if_handle, NULL,
4275                                                &spoofchk);
4276                 if (!status)
4277                         vf_cfg->spoofchk = spoofchk;
4278
4279                 if (!old_vfs) {
4280                         be_cmd_enable_vf(adapter, vf + 1);
4281                         be_cmd_set_logical_link_config(adapter,
4282                                                        IFLA_VF_LINK_STATE_AUTO,
4283                                                        vf+1);
4284                 }
4285         }
4286
4287         if (!old_vfs) {
4288                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4289                 if (status) {
4290                         dev_err(dev, "SRIOV enable failed\n");
4291                         adapter->num_vfs = 0;
4292                         goto err;
4293                 }
4294         }
4295
4296         if (BE3_chip(adapter)) {
4297                 /* On BE3, enable VEB only when SRIOV is enabled */
4298                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4299                                                adapter->if_handle,
4300                                                PORT_FWD_TYPE_VEB, 0);
4301                 if (status)
4302                         goto err;
4303         }
4304
4305         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4306         return 0;
4307 err:
4308         dev_err(dev, "VF setup failed\n");
4309         be_vf_clear(adapter);
4310         return status;
4311 }
4312
4313 /* Converting function_mode bits on BE3 to SH mc_type enums */
4314
4315 static u8 be_convert_mc_type(u32 function_mode)
4316 {
4317         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4318                 return vNIC1;
4319         else if (function_mode & QNQ_MODE)
4320                 return FLEX10;
4321         else if (function_mode & VNIC_MODE)
4322                 return vNIC2;
4323         else if (function_mode & UMC_ENABLED)
4324                 return UMC;
4325         else
4326                 return MC_NONE;
4327 }
4328
4329 /* On BE2/BE3 FW does not suggest the supported limits */
4330 static void BEx_get_resources(struct be_adapter *adapter,
4331                               struct be_resources *res)
4332 {
4333         bool use_sriov = adapter->num_vfs ? 1 : 0;
4334
4335         if (be_physfn(adapter))
4336                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4337         else
4338                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4339
4340         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4341
4342         if (be_is_mc(adapter)) {
4343                 /* Assuming that there are 4 channels per port,
4344                  * when multi-channel is enabled
4345                  */
4346                 if (be_is_qnq_mode(adapter))
4347                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4348                 else
4349                         /* In a non-qnq multichannel mode, the pvid
4350                          * takes up one vlan entry
4351                          */
4352                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4353         } else {
4354                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4355         }
4356
4357         res->max_mcast_mac = BE_MAX_MC;
4358
4359         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4360          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4361          *    *only* if it is RSS-capable.
4362          */
4363         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4364             be_virtfn(adapter) ||
4365             (be_is_mc(adapter) &&
4366              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4367                 res->max_tx_qs = 1;
4368         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4369                 struct be_resources super_nic_res = {0};
4370
4371                 /* On a SuperNIC profile, the driver needs to use the
4372                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4373                  */
4374                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4375                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4376                                           0);
4377                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4378                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4379         } else {
4380                 res->max_tx_qs = BE3_MAX_TX_QS;
4381         }
4382
4383         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4384             !use_sriov && be_physfn(adapter))
4385                 res->max_rss_qs = (adapter->be3_native) ?
4386                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4387         res->max_rx_qs = res->max_rss_qs + 1;
4388
4389         if (be_physfn(adapter))
4390                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4391                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4392         else
4393                 res->max_evt_qs = 1;
4394
4395         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4396         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4397         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4398                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4399 }
4400
4401 static void be_setup_init(struct be_adapter *adapter)
4402 {
4403         adapter->vlan_prio_bmap = 0xff;
4404         adapter->phy.link_speed = -1;
4405         adapter->if_handle = -1;
4406         adapter->be3_native = false;
4407         adapter->if_flags = 0;
4408         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4409         if (be_physfn(adapter))
4410                 adapter->cmd_privileges = MAX_PRIVILEGES;
4411         else
4412                 adapter->cmd_privileges = MIN_PRIVILEGES;
4413 }
4414
4415 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4416  * However, this HW limitation is not exposed to the host via any SLI cmd.
4417  * As a result, in the case of SRIOV and in particular multi-partition configs
4418  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4419  * for distribution between the VFs. This self-imposed limit will determine the
4420  * no: of VFs for which RSS can be enabled.
4421  */
4422 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4423 {
4424         struct be_port_resources port_res = {0};
4425         u8 rss_tables_on_port;
4426         u16 max_vfs = be_max_vfs(adapter);
4427
4428         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4429                                   RESOURCE_LIMITS, 0);
4430
4431         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4432
4433         /* Each PF Pool's RSS Tables limit =
4434          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4435          */
4436         adapter->pool_res.max_rss_tables =
4437                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4438 }
4439
4440 static int be_get_sriov_config(struct be_adapter *adapter)
4441 {
4442         struct be_resources res = {0};
4443         int max_vfs, old_vfs;
4444
4445         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4446                                   RESOURCE_LIMITS, 0);
4447
4448         /* Some old versions of BE3 FW don't report max_vfs value */
4449         if (BE3_chip(adapter) && !res.max_vfs) {
4450                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4451                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4452         }
4453
4454         adapter->pool_res = res;
4455
4456         /* If during previous unload of the driver, the VFs were not disabled,
4457          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4458          * Instead use the TotalVFs value stored in the pci-dev struct.
4459          */
4460         old_vfs = pci_num_vf(adapter->pdev);
4461         if (old_vfs) {
4462                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4463                          old_vfs);
4464
4465                 adapter->pool_res.max_vfs =
4466                         pci_sriov_get_totalvfs(adapter->pdev);
4467                 adapter->num_vfs = old_vfs;
4468         }
4469
4470         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4471                 be_calculate_pf_pool_rss_tables(adapter);
4472                 dev_info(&adapter->pdev->dev,
4473                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4474                          be_max_pf_pool_rss_tables(adapter));
4475         }
4476         return 0;
4477 }
4478
4479 static void be_alloc_sriov_res(struct be_adapter *adapter)
4480 {
4481         int old_vfs = pci_num_vf(adapter->pdev);
4482         struct  be_resources vft_res = {0};
4483         int status;
4484
4485         be_get_sriov_config(adapter);
4486
4487         if (!old_vfs)
4488                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4489
4490         /* When the HW is in SRIOV capable configuration, the PF-pool
4491          * resources are given to PF during driver load, if there are no
4492          * old VFs. This facility is not available in BE3 FW.
4493          * Also, this is done by FW in Lancer chip.
4494          */
4495         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4496                 be_calculate_vf_res(adapter, 0, &vft_res);
4497                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4498                                                  &vft_res);
4499                 if (status)
4500                         dev_err(&adapter->pdev->dev,
4501                                 "Failed to optimize SRIOV resources\n");
4502         }
4503 }
4504
4505 static int be_get_resources(struct be_adapter *adapter)
4506 {
4507         struct device *dev = &adapter->pdev->dev;
4508         struct be_resources res = {0};
4509         int status;
4510
4511         /* For Lancer, SH etc read per-function resource limits from FW.
4512          * GET_FUNC_CONFIG returns per function guaranteed limits.
4513          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4514          */
4515         if (BEx_chip(adapter)) {
4516                 BEx_get_resources(adapter, &res);
4517         } else {
4518                 status = be_cmd_get_func_config(adapter, &res);
4519                 if (status)
4520                         return status;
4521
4522                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4523                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4524                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4525                         res.max_rss_qs -= 1;
4526         }
4527
4528         /* If RoCE is supported stash away half the EQs for RoCE */
4529         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4530                                 res.max_evt_qs / 2 : res.max_evt_qs;
4531         adapter->res = res;
4532
4533         /* If FW supports RSS default queue, then skip creating non-RSS
4534          * queue for non-IP traffic.
4535          */
4536         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4537                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4538
4539         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4540                  be_max_txqs(adapter), be_max_rxqs(adapter),
4541                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4542                  be_max_vfs(adapter));
4543         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4544                  be_max_uc(adapter), be_max_mc(adapter),
4545                  be_max_vlans(adapter));
4546
4547         /* Ensure RX and TX queues are created in pairs at init time */
4548         adapter->cfg_num_rx_irqs =
4549                                 min_t(u16, netif_get_num_default_rss_queues(),
4550                                       be_max_qp_irqs(adapter));
4551         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4552         return 0;
4553 }
4554
4555 static int be_get_config(struct be_adapter *adapter)
4556 {
4557         int status, level;
4558         u16 profile_id;
4559
4560         status = be_cmd_get_cntl_attributes(adapter);
4561         if (status)
4562                 return status;
4563
4564         status = be_cmd_query_fw_cfg(adapter);
4565         if (status)
4566                 return status;
4567
4568         if (!lancer_chip(adapter) && be_physfn(adapter))
4569                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4570
4571         if (BEx_chip(adapter)) {
4572                 level = be_cmd_get_fw_log_level(adapter);
4573                 adapter->msg_enable =
4574                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4575         }
4576
4577         be_cmd_get_acpi_wol_cap(adapter);
4578         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4579         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4580
4581         be_cmd_query_port_name(adapter);
4582
4583         if (be_physfn(adapter)) {
4584                 status = be_cmd_get_active_profile(adapter, &profile_id);
4585                 if (!status)
4586                         dev_info(&adapter->pdev->dev,
4587                                  "Using profile 0x%x\n", profile_id);
4588         }
4589
4590         return 0;
4591 }
4592
4593 static int be_mac_setup(struct be_adapter *adapter)
4594 {
4595         u8 mac[ETH_ALEN];
4596         int status;
4597
4598         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4599                 status = be_cmd_get_perm_mac(adapter, mac);
4600                 if (status)
4601                         return status;
4602
4603                 eth_hw_addr_set(adapter->netdev, mac);
4604                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4605
4606                 /* Initial MAC for BE3 VFs is already programmed by PF */
4607                 if (BEx_chip(adapter) && be_virtfn(adapter))
4608                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4609         }
4610
4611         return 0;
4612 }
4613
4614 static void be_schedule_worker(struct be_adapter *adapter)
4615 {
4616         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4617         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4618 }
4619
4620 static void be_destroy_err_recovery_workq(void)
4621 {
4622         if (!be_err_recovery_workq)
4623                 return;
4624
4625         destroy_workqueue(be_err_recovery_workq);
4626         be_err_recovery_workq = NULL;
4627 }
4628
4629 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630 {
4631         struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633         if (!be_err_recovery_workq)
4634                 return;
4635
4636         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637                            msecs_to_jiffies(delay));
4638         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639 }
4640
4641 static int be_setup_queues(struct be_adapter *adapter)
4642 {
4643         struct net_device *netdev = adapter->netdev;
4644         int status;
4645
4646         status = be_evt_queues_create(adapter);
4647         if (status)
4648                 goto err;
4649
4650         status = be_tx_qs_create(adapter);
4651         if (status)
4652                 goto err;
4653
4654         status = be_rx_cqs_create(adapter);
4655         if (status)
4656                 goto err;
4657
4658         status = be_mcc_queues_create(adapter);
4659         if (status)
4660                 goto err;
4661
4662         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663         if (status)
4664                 goto err;
4665
4666         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667         if (status)
4668                 goto err;
4669
4670         return 0;
4671 err:
4672         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673         return status;
4674 }
4675
4676 static int be_if_create(struct be_adapter *adapter)
4677 {
4678         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679         u32 cap_flags = be_if_cap_flags(adapter);
4680
4681         /* alloc required memory for other filtering fields */
4682         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4683                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4684         if (!adapter->pmac_id)
4685                 return -ENOMEM;
4686
4687         adapter->mc_list = kcalloc(be_max_mc(adapter),
4688                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4689         if (!adapter->mc_list)
4690                 return -ENOMEM;
4691
4692         adapter->uc_list = kcalloc(be_max_uc(adapter),
4693                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4694         if (!adapter->uc_list)
4695                 return -ENOMEM;
4696
4697         if (adapter->cfg_num_rx_irqs == 1)
4698                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4699
4700         en_flags &= cap_flags;
4701         /* will enable all the needed filter flags in be_open() */
4702         return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4703                                   &adapter->if_handle, 0);
4704 }
4705
4706 int be_update_queues(struct be_adapter *adapter)
4707 {
4708         struct net_device *netdev = adapter->netdev;
4709         int status;
4710
4711         if (netif_running(netdev)) {
4712                 /* be_tx_timeout() must not run concurrently with this
4713                  * function, synchronize with an already-running dev_watchdog
4714                  */
4715                 netif_tx_lock_bh(netdev);
4716                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4717                 netif_carrier_off(netdev);
4718                 netif_tx_unlock_bh(netdev);
4719
4720                 be_close(netdev);
4721         }
4722
4723         be_cancel_worker(adapter);
4724
4725         /* If any vectors have been shared with RoCE we cannot re-program
4726          * the MSIx table.
4727          */
4728         if (!adapter->num_msix_roce_vec)
4729                 be_msix_disable(adapter);
4730
4731         be_clear_queues(adapter);
4732         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4733         if (status)
4734                 return status;
4735
4736         if (!msix_enabled(adapter)) {
4737                 status = be_msix_enable(adapter);
4738                 if (status)
4739                         return status;
4740         }
4741
4742         status = be_if_create(adapter);
4743         if (status)
4744                 return status;
4745
4746         status = be_setup_queues(adapter);
4747         if (status)
4748                 return status;
4749
4750         be_schedule_worker(adapter);
4751
4752         /* The IF was destroyed and re-created. We need to clear
4753          * all promiscuous flags valid for the destroyed IF.
4754          * Without this promisc mode is not restored during
4755          * be_open() because the driver thinks that it is
4756          * already enabled in HW.
4757          */
4758         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4759
4760         if (netif_running(netdev))
4761                 status = be_open(netdev);
4762
4763         return status;
4764 }
4765
4766 static inline int fw_major_num(const char *fw_ver)
4767 {
4768         int fw_major = 0, i;
4769
4770         i = sscanf(fw_ver, "%d.", &fw_major);
4771         if (i != 1)
4772                 return 0;
4773
4774         return fw_major;
4775 }
4776
4777 /* If it is error recovery, FLR the PF
4778  * Else if any VFs are already enabled don't FLR the PF
4779  */
4780 static bool be_reset_required(struct be_adapter *adapter)
4781 {
4782         if (be_error_recovering(adapter))
4783                 return true;
4784         else
4785                 return pci_num_vf(adapter->pdev) == 0;
4786 }
4787
4788 /* Wait for the FW to be ready and perform the required initialization */
4789 static int be_func_init(struct be_adapter *adapter)
4790 {
4791         int status;
4792
4793         status = be_fw_wait_ready(adapter);
4794         if (status)
4795                 return status;
4796
4797         /* FW is now ready; clear errors to allow cmds/doorbell */
4798         be_clear_error(adapter, BE_CLEAR_ALL);
4799
4800         if (be_reset_required(adapter)) {
4801                 status = be_cmd_reset_function(adapter);
4802                 if (status)
4803                         return status;
4804
4805                 /* Wait for interrupts to quiesce after an FLR */
4806                 msleep(100);
4807         }
4808
4809         /* Tell FW we're ready to fire cmds */
4810         status = be_cmd_fw_init(adapter);
4811         if (status)
4812                 return status;
4813
4814         /* Allow interrupts for other ULPs running on NIC function */
4815         be_intr_set(adapter, true);
4816
4817         return 0;
4818 }
4819
4820 static int be_setup(struct be_adapter *adapter)
4821 {
4822         struct device *dev = &adapter->pdev->dev;
4823         int status;
4824
4825         status = be_func_init(adapter);
4826         if (status)
4827                 return status;
4828
4829         be_setup_init(adapter);
4830
4831         if (!lancer_chip(adapter))
4832                 be_cmd_req_native_mode(adapter);
4833
4834         /* invoke this cmd first to get pf_num and vf_num which are needed
4835          * for issuing profile related cmds
4836          */
4837         if (!BEx_chip(adapter)) {
4838                 status = be_cmd_get_func_config(adapter, NULL);
4839                 if (status)
4840                         return status;
4841         }
4842
4843         status = be_get_config(adapter);
4844         if (status)
4845                 goto err;
4846
4847         if (!BE2_chip(adapter) && be_physfn(adapter))
4848                 be_alloc_sriov_res(adapter);
4849
4850         status = be_get_resources(adapter);
4851         if (status)
4852                 goto err;
4853
4854         status = be_msix_enable(adapter);
4855         if (status)
4856                 goto err;
4857
4858         /* will enable all the needed filter flags in be_open() */
4859         status = be_if_create(adapter);
4860         if (status)
4861                 goto err;
4862
4863         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4864         rtnl_lock();
4865         status = be_setup_queues(adapter);
4866         rtnl_unlock();
4867         if (status)
4868                 goto err;
4869
4870         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4871
4872         status = be_mac_setup(adapter);
4873         if (status)
4874                 goto err;
4875
4876         be_cmd_get_fw_ver(adapter);
4877         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4878
4879         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4880                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4881                         adapter->fw_ver);
4882                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4883         }
4884
4885         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4886                                          adapter->rx_fc);
4887         if (status)
4888                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4889                                         &adapter->rx_fc);
4890
4891         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4892                  adapter->tx_fc, adapter->rx_fc);
4893
4894         if (be_physfn(adapter))
4895                 be_cmd_set_logical_link_config(adapter,
4896                                                IFLA_VF_LINK_STATE_AUTO, 0);
4897
4898         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4899          * confusing a linux bridge or OVS that it might be connected to.
4900          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4901          * when SRIOV is not enabled.
4902          */
4903         if (BE3_chip(adapter))
4904                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4905                                       PORT_FWD_TYPE_PASSTHRU, 0);
4906
4907         if (adapter->num_vfs)
4908                 be_vf_setup(adapter);
4909
4910         status = be_cmd_get_phy_info(adapter);
4911         if (!status && be_pause_supported(adapter))
4912                 adapter->phy.fc_autoneg = 1;
4913
4914         if (be_physfn(adapter) && !lancer_chip(adapter))
4915                 be_cmd_set_features(adapter);
4916
4917         be_schedule_worker(adapter);
4918         adapter->flags |= BE_FLAGS_SETUP_DONE;
4919         return 0;
4920 err:
4921         be_clear(adapter);
4922         return status;
4923 }
4924
4925 #ifdef CONFIG_NET_POLL_CONTROLLER
4926 static void be_netpoll(struct net_device *netdev)
4927 {
4928         struct be_adapter *adapter = netdev_priv(netdev);
4929         struct be_eq_obj *eqo;
4930         int i;
4931
4932         for_all_evt_queues(adapter, eqo, i) {
4933                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4934                 napi_schedule(&eqo->napi);
4935         }
4936 }
4937 #endif
4938
4939 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4940 {
4941         const struct firmware *fw;
4942         int status;
4943
4944         if (!netif_running(adapter->netdev)) {
4945                 dev_err(&adapter->pdev->dev,
4946                         "Firmware load not allowed (interface is down)\n");
4947                 return -ENETDOWN;
4948         }
4949
4950         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4951         if (status)
4952                 goto fw_exit;
4953
4954         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4955
4956         if (lancer_chip(adapter))
4957                 status = lancer_fw_download(adapter, fw);
4958         else
4959                 status = be_fw_download(adapter, fw);
4960
4961         if (!status)
4962                 be_cmd_get_fw_ver(adapter);
4963
4964 fw_exit:
4965         release_firmware(fw);
4966         return status;
4967 }
4968
4969 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4970                                  u16 flags, struct netlink_ext_ack *extack)
4971 {
4972         struct be_adapter *adapter = netdev_priv(dev);
4973         struct nlattr *attr, *br_spec;
4974         int rem;
4975         int status = 0;
4976         u16 mode = 0;
4977
4978         if (!sriov_enabled(adapter))
4979                 return -EOPNOTSUPP;
4980
4981         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4982         if (!br_spec)
4983                 return -EINVAL;
4984
4985         nla_for_each_nested(attr, br_spec, rem) {
4986                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4987                         continue;
4988
4989                 mode = nla_get_u16(attr);
4990                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4991                         return -EOPNOTSUPP;
4992
4993                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4994                         return -EINVAL;
4995
4996                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4997                                                adapter->if_handle,
4998                                                mode == BRIDGE_MODE_VEPA ?
4999                                                PORT_FWD_TYPE_VEPA :
5000                                                PORT_FWD_TYPE_VEB, 0);
5001                 if (status)
5002                         goto err;
5003
5004                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5005                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5006
5007                 return status;
5008         }
5009 err:
5010         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5011                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5012
5013         return status;
5014 }
5015
5016 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5017                                  struct net_device *dev, u32 filter_mask,
5018                                  int nlflags)
5019 {
5020         struct be_adapter *adapter = netdev_priv(dev);
5021         int status = 0;
5022         u8 hsw_mode;
5023
5024         /* BE and Lancer chips support VEB mode only */
5025         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5026                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5027                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5028                         return 0;
5029                 hsw_mode = PORT_FWD_TYPE_VEB;
5030         } else {
5031                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5032                                                adapter->if_handle, &hsw_mode,
5033                                                NULL);
5034                 if (status)
5035                         return 0;
5036
5037                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5038                         return 0;
5039         }
5040
5041         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5042                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5043                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5044                                        0, 0, nlflags, filter_mask, NULL);
5045 }
5046
5047 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5048                                          void (*func)(struct work_struct *))
5049 {
5050         struct be_cmd_work *work;
5051
5052         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5053         if (!work) {
5054                 dev_err(&adapter->pdev->dev,
5055                         "be_work memory allocation failed\n");
5056                 return NULL;
5057         }
5058
5059         INIT_WORK(&work->work, func);
5060         work->adapter = adapter;
5061         return work;
5062 }
5063
5064 static netdev_features_t be_features_check(struct sk_buff *skb,
5065                                            struct net_device *dev,
5066                                            netdev_features_t features)
5067 {
5068         struct be_adapter *adapter = netdev_priv(dev);
5069         u8 l4_hdr = 0;
5070
5071         if (skb_is_gso(skb)) {
5072                 /* IPv6 TSO requests with extension hdrs are a problem
5073                  * to Lancer and BE3 HW. Disable TSO6 feature.
5074                  */
5075                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5076                         features &= ~NETIF_F_TSO6;
5077
5078                 /* Lancer cannot handle the packet with MSS less than 256.
5079                  * Also it can't handle a TSO packet with a single segment
5080                  * Disable the GSO support in such cases
5081                  */
5082                 if (lancer_chip(adapter) &&
5083                     (skb_shinfo(skb)->gso_size < 256 ||
5084                      skb_shinfo(skb)->gso_segs == 1))
5085                         features &= ~NETIF_F_GSO_MASK;
5086         }
5087
5088         /* The code below restricts offload features for some tunneled and
5089          * Q-in-Q packets.
5090          * Offload features for normal (non tunnel) packets are unchanged.
5091          */
5092         features = vlan_features_check(skb, features);
5093         if (!skb->encapsulation ||
5094             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5095                 return features;
5096
5097         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5098          * should disable tunnel offload features if it's not a VxLAN packet,
5099          * as tunnel offloads have been enabled only for VxLAN. This is done to
5100          * allow other tunneled traffic like GRE work fine while VxLAN
5101          * offloads are configured in Skyhawk-R.
5102          */
5103         switch (vlan_get_protocol(skb)) {
5104         case htons(ETH_P_IP):
5105                 l4_hdr = ip_hdr(skb)->protocol;
5106                 break;
5107         case htons(ETH_P_IPV6):
5108                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5109                 break;
5110         default:
5111                 return features;
5112         }
5113
5114         if (l4_hdr != IPPROTO_UDP ||
5115             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5116             skb->inner_protocol != htons(ETH_P_TEB) ||
5117             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5118                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5119             !adapter->vxlan_port ||
5120             udp_hdr(skb)->dest != adapter->vxlan_port)
5121                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5122
5123         return features;
5124 }
5125
5126 static int be_get_phys_port_id(struct net_device *dev,
5127                                struct netdev_phys_item_id *ppid)
5128 {
5129         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5130         struct be_adapter *adapter = netdev_priv(dev);
5131         u8 *id;
5132
5133         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5134                 return -ENOSPC;
5135
5136         ppid->id[0] = adapter->hba_port_num + 1;
5137         id = &ppid->id[1];
5138         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5139              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5140                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5141
5142         ppid->id_len = id_len;
5143
5144         return 0;
5145 }
5146
5147 static void be_set_rx_mode(struct net_device *dev)
5148 {
5149         struct be_adapter *adapter = netdev_priv(dev);
5150         struct be_cmd_work *work;
5151
5152         work = be_alloc_work(adapter, be_work_set_rx_mode);
5153         if (work)
5154                 queue_work(be_wq, &work->work);
5155 }
5156
5157 static const struct net_device_ops be_netdev_ops = {
5158         .ndo_open               = be_open,
5159         .ndo_stop               = be_close,
5160         .ndo_start_xmit         = be_xmit,
5161         .ndo_set_rx_mode        = be_set_rx_mode,
5162         .ndo_set_mac_address    = be_mac_addr_set,
5163         .ndo_get_stats64        = be_get_stats64,
5164         .ndo_validate_addr      = eth_validate_addr,
5165         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5166         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5167         .ndo_set_vf_mac         = be_set_vf_mac,
5168         .ndo_set_vf_vlan        = be_set_vf_vlan,
5169         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5170         .ndo_get_vf_config      = be_get_vf_config,
5171         .ndo_set_vf_link_state  = be_set_vf_link_state,
5172         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5173         .ndo_tx_timeout         = be_tx_timeout,
5174 #ifdef CONFIG_NET_POLL_CONTROLLER
5175         .ndo_poll_controller    = be_netpoll,
5176 #endif
5177         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5178         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5179         .ndo_features_check     = be_features_check,
5180         .ndo_get_phys_port_id   = be_get_phys_port_id,
5181 };
5182
5183 static void be_netdev_init(struct net_device *netdev)
5184 {
5185         struct be_adapter *adapter = netdev_priv(netdev);
5186
5187         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5188                 NETIF_F_GSO_UDP_TUNNEL |
5189                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5190                 NETIF_F_HW_VLAN_CTAG_TX;
5191         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5192                 netdev->hw_features |= NETIF_F_RXHASH;
5193
5194         netdev->features |= netdev->hw_features |
5195                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER |
5196                 NETIF_F_HIGHDMA;
5197
5198         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5199                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5200
5201         netdev->priv_flags |= IFF_UNICAST_FLT;
5202
5203         netdev->flags |= IFF_MULTICAST;
5204
5205         netif_set_tso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5206
5207         netdev->netdev_ops = &be_netdev_ops;
5208
5209         netdev->ethtool_ops = &be_ethtool_ops;
5210
5211         if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5212                 netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5213
5214         /* MTU range: 256 - 9000 */
5215         netdev->min_mtu = BE_MIN_MTU;
5216         netdev->max_mtu = BE_MAX_MTU;
5217 }
5218
5219 static void be_cleanup(struct be_adapter *adapter)
5220 {
5221         struct net_device *netdev = adapter->netdev;
5222
5223         rtnl_lock();
5224         netif_device_detach(netdev);
5225         if (netif_running(netdev))
5226                 be_close(netdev);
5227         rtnl_unlock();
5228
5229         be_clear(adapter);
5230 }
5231
5232 static int be_resume(struct be_adapter *adapter)
5233 {
5234         struct net_device *netdev = adapter->netdev;
5235         int status;
5236
5237         status = be_setup(adapter);
5238         if (status)
5239                 return status;
5240
5241         rtnl_lock();
5242         if (netif_running(netdev))
5243                 status = be_open(netdev);
5244         rtnl_unlock();
5245
5246         if (status)
5247                 return status;
5248
5249         netif_device_attach(netdev);
5250
5251         return 0;
5252 }
5253
5254 static void be_soft_reset(struct be_adapter *adapter)
5255 {
5256         u32 val;
5257
5258         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5259         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5260         val |= SLIPORT_SOFTRESET_SR_MASK;
5261         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262 }
5263
5264 static bool be_err_is_recoverable(struct be_adapter *adapter)
5265 {
5266         struct be_error_recovery *err_rec = &adapter->error_recovery;
5267         unsigned long initial_idle_time =
5268                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5269         unsigned long recovery_interval =
5270                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5271         u16 ue_err_code;
5272         u32 val;
5273
5274         val = be_POST_stage_get(adapter);
5275         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5276                 return false;
5277         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5278         if (ue_err_code == 0)
5279                 return false;
5280
5281         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5282                 ue_err_code);
5283
5284         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5285                 dev_err(&adapter->pdev->dev,
5286                         "Cannot recover within %lu sec from driver load\n",
5287                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5288                 return false;
5289         }
5290
5291         if (err_rec->last_recovery_time && time_before_eq(
5292                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5293                 dev_err(&adapter->pdev->dev,
5294                         "Cannot recover within %lu sec from last recovery\n",
5295                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5296                 return false;
5297         }
5298
5299         if (ue_err_code == err_rec->last_err_code) {
5300                 dev_err(&adapter->pdev->dev,
5301                         "Cannot recover from a consecutive TPE error\n");
5302                 return false;
5303         }
5304
5305         err_rec->last_recovery_time = jiffies;
5306         err_rec->last_err_code = ue_err_code;
5307         return true;
5308 }
5309
5310 static int be_tpe_recover(struct be_adapter *adapter)
5311 {
5312         struct be_error_recovery *err_rec = &adapter->error_recovery;
5313         int status = -EAGAIN;
5314         u32 val;
5315
5316         switch (err_rec->recovery_state) {
5317         case ERR_RECOVERY_ST_NONE:
5318                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5319                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5320                 break;
5321
5322         case ERR_RECOVERY_ST_DETECT:
5323                 val = be_POST_stage_get(adapter);
5324                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5325                     POST_STAGE_RECOVERABLE_ERR) {
5326                         dev_err(&adapter->pdev->dev,
5327                                 "Unrecoverable HW error detected: 0x%x\n", val);
5328                         status = -EINVAL;
5329                         err_rec->resched_delay = 0;
5330                         break;
5331                 }
5332
5333                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5334
5335                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5336                  * milliseconds before it checks for final error status in
5337                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5338                  * If it does, then PF0 initiates a Soft Reset.
5339                  */
5340                 if (adapter->pf_num == 0) {
5341                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5342                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5343                                         ERR_RECOVERY_UE_DETECT_DURATION;
5344                         break;
5345                 }
5346
5347                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5348                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5349                                         ERR_RECOVERY_UE_DETECT_DURATION;
5350                 break;
5351
5352         case ERR_RECOVERY_ST_RESET:
5353                 if (!be_err_is_recoverable(adapter)) {
5354                         dev_err(&adapter->pdev->dev,
5355                                 "Failed to meet recovery criteria\n");
5356                         status = -EIO;
5357                         err_rec->resched_delay = 0;
5358                         break;
5359                 }
5360                 be_soft_reset(adapter);
5361                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5362                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5363                                         err_rec->ue_to_reset_time;
5364                 break;
5365
5366         case ERR_RECOVERY_ST_PRE_POLL:
5367                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5368                 err_rec->resched_delay = 0;
5369                 status = 0;                     /* done */
5370                 break;
5371
5372         default:
5373                 status = -EINVAL;
5374                 err_rec->resched_delay = 0;
5375                 break;
5376         }
5377
5378         return status;
5379 }
5380
5381 static int be_err_recover(struct be_adapter *adapter)
5382 {
5383         int status;
5384
5385         if (!lancer_chip(adapter)) {
5386                 if (!adapter->error_recovery.recovery_supported ||
5387                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5388                         return -EIO;
5389                 status = be_tpe_recover(adapter);
5390                 if (status)
5391                         goto err;
5392         }
5393
5394         /* Wait for adapter to reach quiescent state before
5395          * destroying queues
5396          */
5397         status = be_fw_wait_ready(adapter);
5398         if (status)
5399                 goto err;
5400
5401         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5402
5403         be_cleanup(adapter);
5404
5405         status = be_resume(adapter);
5406         if (status)
5407                 goto err;
5408
5409         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5410
5411 err:
5412         return status;
5413 }
5414
5415 static void be_err_detection_task(struct work_struct *work)
5416 {
5417         struct be_error_recovery *err_rec =
5418                         container_of(work, struct be_error_recovery,
5419                                      err_detection_work.work);
5420         struct be_adapter *adapter =
5421                         container_of(err_rec, struct be_adapter,
5422                                      error_recovery);
5423         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5424         struct device *dev = &adapter->pdev->dev;
5425         int recovery_status;
5426
5427         be_detect_error(adapter);
5428         if (!be_check_error(adapter, BE_ERROR_HW))
5429                 goto reschedule_task;
5430
5431         recovery_status = be_err_recover(adapter);
5432         if (!recovery_status) {
5433                 err_rec->recovery_retries = 0;
5434                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5435                 dev_info(dev, "Adapter recovery successful\n");
5436                 goto reschedule_task;
5437         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5438                 /* BEx/SH recovery state machine */
5439                 if (adapter->pf_num == 0 &&
5440                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5441                         dev_err(&adapter->pdev->dev,
5442                                 "Adapter recovery in progress\n");
5443                 resched_delay = err_rec->resched_delay;
5444                 goto reschedule_task;
5445         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5446                 /* For VFs, check if PF have allocated resources
5447                  * every second.
5448                  */
5449                 dev_err(dev, "Re-trying adapter recovery\n");
5450                 goto reschedule_task;
5451         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5452                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5453                 /* In case of another error during recovery, it takes 30 sec
5454                  * for adapter to come out of error. Retry error recovery after
5455                  * this time interval.
5456                  */
5457                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5458                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5459                 goto reschedule_task;
5460         } else {
5461                 dev_err(dev, "Adapter recovery failed\n");
5462                 dev_err(dev, "Please reboot server to recover\n");
5463         }
5464
5465         return;
5466
5467 reschedule_task:
5468         be_schedule_err_detection(adapter, resched_delay);
5469 }
5470
5471 static void be_log_sfp_info(struct be_adapter *adapter)
5472 {
5473         int status;
5474
5475         status = be_cmd_query_sfp_info(adapter);
5476         if (!status) {
5477                 dev_err(&adapter->pdev->dev,
5478                         "Port %c: %s Vendor: %s part no: %s",
5479                         adapter->port_name,
5480                         be_misconfig_evt_port_state[adapter->phy_state],
5481                         adapter->phy.vendor_name,
5482                         adapter->phy.vendor_pn);
5483         }
5484         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5485 }
5486
5487 static void be_worker(struct work_struct *work)
5488 {
5489         struct be_adapter *adapter =
5490                 container_of(work, struct be_adapter, work.work);
5491         struct be_rx_obj *rxo;
5492         int i;
5493
5494         if (be_physfn(adapter) &&
5495             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5496                 be_cmd_get_die_temperature(adapter);
5497
5498         /* when interrupts are not yet enabled, just reap any pending
5499          * mcc completions
5500          */
5501         if (!netif_running(adapter->netdev)) {
5502                 local_bh_disable();
5503                 be_process_mcc(adapter);
5504                 local_bh_enable();
5505                 goto reschedule;
5506         }
5507
5508         if (!adapter->stats_cmd_sent) {
5509                 if (lancer_chip(adapter))
5510                         lancer_cmd_get_pport_stats(adapter,
5511                                                    &adapter->stats_cmd);
5512                 else
5513                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5514         }
5515
5516         for_all_rx_queues(adapter, rxo, i) {
5517                 /* Replenish RX-queues starved due to memory
5518                  * allocation failures.
5519                  */
5520                 if (rxo->rx_post_starved)
5521                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5522         }
5523
5524         /* EQ-delay update for Skyhawk is done while notifying EQ */
5525         if (!skyhawk_chip(adapter))
5526                 be_eqd_update(adapter, false);
5527
5528         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5529                 be_log_sfp_info(adapter);
5530
5531 reschedule:
5532         adapter->work_counter++;
5533         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5534 }
5535
5536 static void be_unmap_pci_bars(struct be_adapter *adapter)
5537 {
5538         if (adapter->csr)
5539                 pci_iounmap(adapter->pdev, adapter->csr);
5540         if (adapter->db)
5541                 pci_iounmap(adapter->pdev, adapter->db);
5542         if (adapter->pcicfg && adapter->pcicfg_mapped)
5543                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5544 }
5545
5546 static int db_bar(struct be_adapter *adapter)
5547 {
5548         if (lancer_chip(adapter) || be_virtfn(adapter))
5549                 return 0;
5550         else
5551                 return 4;
5552 }
5553
5554 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5555 {
5556         if (skyhawk_chip(adapter)) {
5557                 adapter->roce_db.size = 4096;
5558                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5559                                                               db_bar(adapter));
5560                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5561                                                                db_bar(adapter));
5562         }
5563         return 0;
5564 }
5565
5566 static int be_map_pci_bars(struct be_adapter *adapter)
5567 {
5568         struct pci_dev *pdev = adapter->pdev;
5569         u8 __iomem *addr;
5570         u32 sli_intf;
5571
5572         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5573         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5574                                 SLI_INTF_FAMILY_SHIFT;
5575         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5576
5577         if (BEx_chip(adapter) && be_physfn(adapter)) {
5578                 adapter->csr = pci_iomap(pdev, 2, 0);
5579                 if (!adapter->csr)
5580                         return -ENOMEM;
5581         }
5582
5583         addr = pci_iomap(pdev, db_bar(adapter), 0);
5584         if (!addr)
5585                 goto pci_map_err;
5586         adapter->db = addr;
5587
5588         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5589                 if (be_physfn(adapter)) {
5590                         /* PCICFG is the 2nd BAR in BE2 */
5591                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5592                         if (!addr)
5593                                 goto pci_map_err;
5594                         adapter->pcicfg = addr;
5595                         adapter->pcicfg_mapped = true;
5596                 } else {
5597                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5598                         adapter->pcicfg_mapped = false;
5599                 }
5600         }
5601
5602         be_roce_map_pci_bars(adapter);
5603         return 0;
5604
5605 pci_map_err:
5606         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5607         be_unmap_pci_bars(adapter);
5608         return -ENOMEM;
5609 }
5610
5611 static void be_drv_cleanup(struct be_adapter *adapter)
5612 {
5613         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5614         struct device *dev = &adapter->pdev->dev;
5615
5616         if (mem->va)
5617                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618
5619         mem = &adapter->rx_filter;
5620         if (mem->va)
5621                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622
5623         mem = &adapter->stats_cmd;
5624         if (mem->va)
5625                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626 }
5627
5628 /* Allocate and initialize various fields in be_adapter struct */
5629 static int be_drv_init(struct be_adapter *adapter)
5630 {
5631         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5632         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5633         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5634         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5635         struct device *dev = &adapter->pdev->dev;
5636         int status = 0;
5637
5638         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5639         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5640                                                 &mbox_mem_alloc->dma,
5641                                                 GFP_KERNEL);
5642         if (!mbox_mem_alloc->va)
5643                 return -ENOMEM;
5644
5645         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5646         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5647         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5648
5649         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5650         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5651                                            &rx_filter->dma, GFP_KERNEL);
5652         if (!rx_filter->va) {
5653                 status = -ENOMEM;
5654                 goto free_mbox;
5655         }
5656
5657         if (lancer_chip(adapter))
5658                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5659         else if (BE2_chip(adapter))
5660                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5661         else if (BE3_chip(adapter))
5662                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5663         else
5664                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5665         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5666                                            &stats_cmd->dma, GFP_KERNEL);
5667         if (!stats_cmd->va) {
5668                 status = -ENOMEM;
5669                 goto free_rx_filter;
5670         }
5671
5672         mutex_init(&adapter->mbox_lock);
5673         mutex_init(&adapter->mcc_lock);
5674         mutex_init(&adapter->rx_filter_lock);
5675         spin_lock_init(&adapter->mcc_cq_lock);
5676         init_completion(&adapter->et_cmd_compl);
5677
5678         pci_save_state(adapter->pdev);
5679
5680         INIT_DELAYED_WORK(&adapter->work, be_worker);
5681
5682         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5683         adapter->error_recovery.resched_delay = 0;
5684         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5685                           be_err_detection_task);
5686
5687         adapter->rx_fc = true;
5688         adapter->tx_fc = true;
5689
5690         /* Must be a power of 2 or else MODULO will BUG_ON */
5691         adapter->be_get_temp_freq = 64;
5692
5693         return 0;
5694
5695 free_rx_filter:
5696         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5697 free_mbox:
5698         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5699                           mbox_mem_alloc->dma);
5700         return status;
5701 }
5702
5703 static void be_remove(struct pci_dev *pdev)
5704 {
5705         struct be_adapter *adapter = pci_get_drvdata(pdev);
5706
5707         if (!adapter)
5708                 return;
5709
5710         be_roce_dev_remove(adapter);
5711         be_intr_set(adapter, false);
5712
5713         be_cancel_err_detection(adapter);
5714
5715         unregister_netdev(adapter->netdev);
5716
5717         be_clear(adapter);
5718
5719         if (!pci_vfs_assigned(adapter->pdev))
5720                 be_cmd_reset_function(adapter);
5721
5722         /* tell fw we're done with firing cmds */
5723         be_cmd_fw_clean(adapter);
5724
5725         be_unmap_pci_bars(adapter);
5726         be_drv_cleanup(adapter);
5727
5728         pci_release_regions(pdev);
5729         pci_disable_device(pdev);
5730
5731         free_netdev(adapter->netdev);
5732 }
5733
5734 static ssize_t be_hwmon_show_temp(struct device *dev,
5735                                   struct device_attribute *dev_attr,
5736                                   char *buf)
5737 {
5738         struct be_adapter *adapter = dev_get_drvdata(dev);
5739
5740         /* Unit: millidegree Celsius */
5741         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5742                 return -EIO;
5743         else
5744                 return sprintf(buf, "%u\n",
5745                                adapter->hwmon_info.be_on_die_temp * 1000);
5746 }
5747
5748 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5749                           be_hwmon_show_temp, NULL, 1);
5750
5751 static struct attribute *be_hwmon_attrs[] = {
5752         &sensor_dev_attr_temp1_input.dev_attr.attr,
5753         NULL
5754 };
5755
5756 ATTRIBUTE_GROUPS(be_hwmon);
5757
5758 static char *mc_name(struct be_adapter *adapter)
5759 {
5760         char *str = ""; /* default */
5761
5762         switch (adapter->mc_type) {
5763         case UMC:
5764                 str = "UMC";
5765                 break;
5766         case FLEX10:
5767                 str = "FLEX10";
5768                 break;
5769         case vNIC1:
5770                 str = "vNIC-1";
5771                 break;
5772         case nPAR:
5773                 str = "nPAR";
5774                 break;
5775         case UFP:
5776                 str = "UFP";
5777                 break;
5778         case vNIC2:
5779                 str = "vNIC-2";
5780                 break;
5781         default:
5782                 str = "";
5783         }
5784
5785         return str;
5786 }
5787
5788 static inline char *func_name(struct be_adapter *adapter)
5789 {
5790         return be_physfn(adapter) ? "PF" : "VF";
5791 }
5792
5793 static inline char *nic_name(struct pci_dev *pdev)
5794 {
5795         switch (pdev->device) {
5796         case OC_DEVICE_ID1:
5797                 return OC_NAME;
5798         case OC_DEVICE_ID2:
5799                 return OC_NAME_BE;
5800         case OC_DEVICE_ID3:
5801         case OC_DEVICE_ID4:
5802                 return OC_NAME_LANCER;
5803         case BE_DEVICE_ID2:
5804                 return BE3_NAME;
5805         case OC_DEVICE_ID5:
5806         case OC_DEVICE_ID6:
5807                 return OC_NAME_SH;
5808         default:
5809                 return BE_NAME;
5810         }
5811 }
5812
5813 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5814 {
5815         struct be_adapter *adapter;
5816         struct net_device *netdev;
5817         int status = 0;
5818
5819         status = pci_enable_device(pdev);
5820         if (status)
5821                 goto do_none;
5822
5823         status = pci_request_regions(pdev, DRV_NAME);
5824         if (status)
5825                 goto disable_dev;
5826         pci_set_master(pdev);
5827
5828         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5829         if (!netdev) {
5830                 status = -ENOMEM;
5831                 goto rel_reg;
5832         }
5833         adapter = netdev_priv(netdev);
5834         adapter->pdev = pdev;
5835         pci_set_drvdata(pdev, adapter);
5836         adapter->netdev = netdev;
5837         SET_NETDEV_DEV(netdev, &pdev->dev);
5838
5839         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5840         if (status) {
5841                 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5842                 goto free_netdev;
5843         }
5844
5845         status = be_map_pci_bars(adapter);
5846         if (status)
5847                 goto free_netdev;
5848
5849         status = be_drv_init(adapter);
5850         if (status)
5851                 goto unmap_bars;
5852
5853         status = be_setup(adapter);
5854         if (status)
5855                 goto drv_cleanup;
5856
5857         be_netdev_init(netdev);
5858         status = register_netdev(netdev);
5859         if (status != 0)
5860                 goto unsetup;
5861
5862         be_roce_dev_add(adapter);
5863
5864         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5865         adapter->error_recovery.probe_time = jiffies;
5866
5867         /* On Die temperature not supported for VF. */
5868         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5869                 adapter->hwmon_info.hwmon_dev =
5870                         devm_hwmon_device_register_with_groups(&pdev->dev,
5871                                                                DRV_NAME,
5872                                                                adapter,
5873                                                                be_hwmon_groups);
5874                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5875         }
5876
5877         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5878                  func_name(adapter), mc_name(adapter), adapter->port_name);
5879
5880         return 0;
5881
5882 unsetup:
5883         be_clear(adapter);
5884 drv_cleanup:
5885         be_drv_cleanup(adapter);
5886 unmap_bars:
5887         be_unmap_pci_bars(adapter);
5888 free_netdev:
5889         free_netdev(netdev);
5890 rel_reg:
5891         pci_release_regions(pdev);
5892 disable_dev:
5893         pci_disable_device(pdev);
5894 do_none:
5895         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5896         return status;
5897 }
5898
5899 static int __maybe_unused be_suspend(struct device *dev_d)
5900 {
5901         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5902
5903         be_intr_set(adapter, false);
5904         be_cancel_err_detection(adapter);
5905
5906         be_cleanup(adapter);
5907
5908         return 0;
5909 }
5910
5911 static int __maybe_unused be_pci_resume(struct device *dev_d)
5912 {
5913         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5914         int status = 0;
5915
5916         status = be_resume(adapter);
5917         if (status)
5918                 return status;
5919
5920         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5921
5922         return 0;
5923 }
5924
5925 /*
5926  * An FLR will stop BE from DMAing any data.
5927  */
5928 static void be_shutdown(struct pci_dev *pdev)
5929 {
5930         struct be_adapter *adapter = pci_get_drvdata(pdev);
5931
5932         if (!adapter)
5933                 return;
5934
5935         be_roce_dev_shutdown(adapter);
5936         cancel_delayed_work_sync(&adapter->work);
5937         be_cancel_err_detection(adapter);
5938
5939         netif_device_detach(adapter->netdev);
5940
5941         be_cmd_reset_function(adapter);
5942
5943         pci_disable_device(pdev);
5944 }
5945
5946 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5947                                             pci_channel_state_t state)
5948 {
5949         struct be_adapter *adapter = pci_get_drvdata(pdev);
5950
5951         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5952
5953         be_roce_dev_remove(adapter);
5954
5955         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5956                 be_set_error(adapter, BE_ERROR_EEH);
5957
5958                 be_cancel_err_detection(adapter);
5959
5960                 be_cleanup(adapter);
5961         }
5962
5963         if (state == pci_channel_io_perm_failure)
5964                 return PCI_ERS_RESULT_DISCONNECT;
5965
5966         pci_disable_device(pdev);
5967
5968         /* The error could cause the FW to trigger a flash debug dump.
5969          * Resetting the card while flash dump is in progress
5970          * can cause it not to recover; wait for it to finish.
5971          * Wait only for first function as it is needed only once per
5972          * adapter.
5973          */
5974         if (pdev->devfn == 0)
5975                 ssleep(30);
5976
5977         return PCI_ERS_RESULT_NEED_RESET;
5978 }
5979
5980 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5981 {
5982         struct be_adapter *adapter = pci_get_drvdata(pdev);
5983         int status;
5984
5985         dev_info(&adapter->pdev->dev, "EEH reset\n");
5986
5987         status = pci_enable_device(pdev);
5988         if (status)
5989                 return PCI_ERS_RESULT_DISCONNECT;
5990
5991         pci_set_master(pdev);
5992         pci_restore_state(pdev);
5993
5994         /* Check if card is ok and fw is ready */
5995         dev_info(&adapter->pdev->dev,
5996                  "Waiting for FW to be ready after EEH reset\n");
5997         status = be_fw_wait_ready(adapter);
5998         if (status)
5999                 return PCI_ERS_RESULT_DISCONNECT;
6000
6001         be_clear_error(adapter, BE_CLEAR_ALL);
6002         return PCI_ERS_RESULT_RECOVERED;
6003 }
6004
6005 static void be_eeh_resume(struct pci_dev *pdev)
6006 {
6007         int status = 0;
6008         struct be_adapter *adapter = pci_get_drvdata(pdev);
6009
6010         dev_info(&adapter->pdev->dev, "EEH resume\n");
6011
6012         pci_save_state(pdev);
6013
6014         status = be_resume(adapter);
6015         if (status)
6016                 goto err;
6017
6018         be_roce_dev_add(adapter);
6019
6020         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6021         return;
6022 err:
6023         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6024 }
6025
6026 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6027 {
6028         struct be_adapter *adapter = pci_get_drvdata(pdev);
6029         struct be_resources vft_res = {0};
6030         int status;
6031
6032         if (!num_vfs)
6033                 be_vf_clear(adapter);
6034
6035         adapter->num_vfs = num_vfs;
6036
6037         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6038                 dev_warn(&pdev->dev,
6039                          "Cannot disable VFs while they are assigned\n");
6040                 return -EBUSY;
6041         }
6042
6043         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6044          * are equally distributed across the max-number of VFs. The user may
6045          * request only a subset of the max-vfs to be enabled.
6046          * Based on num_vfs, redistribute the resources across num_vfs so that
6047          * each VF will have access to more number of resources.
6048          * This facility is not available in BE3 FW.
6049          * Also, this is done by FW in Lancer chip.
6050          */
6051         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6052                 be_calculate_vf_res(adapter, adapter->num_vfs,
6053                                     &vft_res);
6054                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6055                                                  adapter->num_vfs, &vft_res);
6056                 if (status)
6057                         dev_err(&pdev->dev,
6058                                 "Failed to optimize SR-IOV resources\n");
6059         }
6060
6061         status = be_get_resources(adapter);
6062         if (status)
6063                 return be_cmd_status(status);
6064
6065         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6066         rtnl_lock();
6067         status = be_update_queues(adapter);
6068         rtnl_unlock();
6069         if (status)
6070                 return be_cmd_status(status);
6071
6072         if (adapter->num_vfs)
6073                 status = be_vf_setup(adapter);
6074
6075         if (!status)
6076                 return adapter->num_vfs;
6077
6078         return 0;
6079 }
6080
6081 static const struct pci_error_handlers be_eeh_handlers = {
6082         .error_detected = be_eeh_err_detected,
6083         .slot_reset = be_eeh_reset,
6084         .resume = be_eeh_resume,
6085 };
6086
6087 static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6088
6089 static struct pci_driver be_driver = {
6090         .name = DRV_NAME,
6091         .id_table = be_dev_ids,
6092         .probe = be_probe,
6093         .remove = be_remove,
6094         .driver.pm = &be_pci_pm_ops,
6095         .shutdown = be_shutdown,
6096         .sriov_configure = be_pci_sriov_configure,
6097         .err_handler = &be_eeh_handlers
6098 };
6099
6100 static int __init be_init_module(void)
6101 {
6102         int status;
6103
6104         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6105             rx_frag_size != 2048) {
6106                 printk(KERN_WARNING DRV_NAME
6107                         " : Module param rx_frag_size must be 2048/4096/8192."
6108                         " Using 2048\n");
6109                 rx_frag_size = 2048;
6110         }
6111
6112         if (num_vfs > 0) {
6113                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6114                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6115         }
6116
6117         be_wq = create_singlethread_workqueue("be_wq");
6118         if (!be_wq) {
6119                 pr_warn(DRV_NAME "workqueue creation failed\n");
6120                 return -1;
6121         }
6122
6123         be_err_recovery_workq =
6124                 create_singlethread_workqueue("be_err_recover");
6125         if (!be_err_recovery_workq)
6126                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6127
6128         status = pci_register_driver(&be_driver);
6129         if (status) {
6130                 destroy_workqueue(be_wq);
6131                 be_destroy_err_recovery_workq();
6132         }
6133         return status;
6134 }
6135 module_init(be_init_module);
6136
6137 static void __exit be_exit_module(void)
6138 {
6139         pci_unregister_driver(&be_driver);
6140
6141         be_destroy_err_recovery_workq();
6142
6143         if (be_wq)
6144                 destroy_workqueue(be_wq);
6145 }
6146 module_exit(be_exit_module);