GNU Linux-libre 4.9.292-gnu1
[releases.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
651                                                 struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715         return stats;
716 }
717
718 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
719 {
720         struct net_device *netdev = adapter->netdev;
721
722         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
723                 netif_carrier_off(netdev);
724                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
725         }
726
727         if (link_status)
728                 netif_carrier_on(netdev);
729         else
730                 netif_carrier_off(netdev);
731
732         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
733 }
734
735 static int be_gso_hdr_len(struct sk_buff *skb)
736 {
737         if (skb->encapsulation)
738                 return skb_inner_transport_offset(skb) +
739                        inner_tcp_hdrlen(skb);
740         return skb_transport_offset(skb) + tcp_hdrlen(skb);
741 }
742
743 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
744 {
745         struct be_tx_stats *stats = tx_stats(txo);
746         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
747         /* Account for headers which get duplicated in TSO pkt */
748         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
749
750         u64_stats_update_begin(&stats->sync);
751         stats->tx_reqs++;
752         stats->tx_bytes += skb->len + dup_hdr_len;
753         stats->tx_pkts += tx_pkts;
754         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
755                 stats->tx_vxlan_offload_pkts += tx_pkts;
756         u64_stats_update_end(&stats->sync);
757 }
758
759 /* Returns number of WRBs needed for the skb */
760 static u32 skb_wrb_cnt(struct sk_buff *skb)
761 {
762         /* +1 for the header wrb */
763         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
764 }
765
766 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
767 {
768         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
769         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
770         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
771         wrb->rsvd0 = 0;
772 }
773
774 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
775  * to avoid the swap and shift/mask operations in wrb_fill().
776  */
777 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
778 {
779         wrb->frag_pa_hi = 0;
780         wrb->frag_pa_lo = 0;
781         wrb->frag_len = 0;
782         wrb->rsvd0 = 0;
783 }
784
785 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
786                                      struct sk_buff *skb)
787 {
788         u8 vlan_prio;
789         u16 vlan_tag;
790
791         vlan_tag = skb_vlan_tag_get(skb);
792         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
793         /* If vlan priority provided by OS is NOT in available bmap */
794         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
795                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
796                                 adapter->recommended_prio_bits;
797
798         return vlan_tag;
799 }
800
801 /* Used only for IP tunnel packets */
802 static u16 skb_inner_ip_proto(struct sk_buff *skb)
803 {
804         return (inner_ip_hdr(skb)->version == 4) ?
805                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
806 }
807
808 static u16 skb_ip_proto(struct sk_buff *skb)
809 {
810         return (ip_hdr(skb)->version == 4) ?
811                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
812 }
813
814 static inline bool be_is_txq_full(struct be_tx_obj *txo)
815 {
816         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
817 }
818
819 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
820 {
821         return atomic_read(&txo->q.used) < txo->q.len / 2;
822 }
823
824 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
825 {
826         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
827 }
828
829 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
830                                        struct sk_buff *skb,
831                                        struct be_wrb_params *wrb_params)
832 {
833         u16 proto;
834
835         if (skb_is_gso(skb)) {
836                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
837                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
838                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
839                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
840         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
841                 if (skb->encapsulation) {
842                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
843                         proto = skb_inner_ip_proto(skb);
844                 } else {
845                         proto = skb_ip_proto(skb);
846                 }
847                 if (proto == IPPROTO_TCP)
848                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
849                 else if (proto == IPPROTO_UDP)
850                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
851         }
852
853         if (skb_vlan_tag_present(skb)) {
854                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
855                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
856         }
857
858         BE_WRB_F_SET(wrb_params->features, CRC, 1);
859 }
860
861 static void wrb_fill_hdr(struct be_adapter *adapter,
862                          struct be_eth_hdr_wrb *hdr,
863                          struct be_wrb_params *wrb_params,
864                          struct sk_buff *skb)
865 {
866         memset(hdr, 0, sizeof(*hdr));
867
868         SET_TX_WRB_HDR_BITS(crc, hdr,
869                             BE_WRB_F_GET(wrb_params->features, CRC));
870         SET_TX_WRB_HDR_BITS(ipcs, hdr,
871                             BE_WRB_F_GET(wrb_params->features, IPCS));
872         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, TCPCS));
874         SET_TX_WRB_HDR_BITS(udpcs, hdr,
875                             BE_WRB_F_GET(wrb_params->features, UDPCS));
876
877         SET_TX_WRB_HDR_BITS(lso, hdr,
878                             BE_WRB_F_GET(wrb_params->features, LSO));
879         SET_TX_WRB_HDR_BITS(lso6, hdr,
880                             BE_WRB_F_GET(wrb_params->features, LSO6));
881         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
882
883         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
884          * hack is not needed, the evt bit is set while ringing DB.
885          */
886         SET_TX_WRB_HDR_BITS(event, hdr,
887                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
888         SET_TX_WRB_HDR_BITS(vlan, hdr,
889                             BE_WRB_F_GET(wrb_params->features, VLAN));
890         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
891
892         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
893         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
894         SET_TX_WRB_HDR_BITS(mgmt, hdr,
895                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
896 }
897
898 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
899                           bool unmap_single)
900 {
901         dma_addr_t dma;
902         u32 frag_len = le32_to_cpu(wrb->frag_len);
903
904
905         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
906                 (u64)le32_to_cpu(wrb->frag_pa_lo);
907         if (frag_len) {
908                 if (unmap_single)
909                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
910                 else
911                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
912         }
913 }
914
915 /* Grab a WRB header for xmit */
916 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
917 {
918         u32 head = txo->q.head;
919
920         queue_head_inc(&txo->q);
921         return head;
922 }
923
924 /* Set up the WRB header for xmit */
925 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
926                                 struct be_tx_obj *txo,
927                                 struct be_wrb_params *wrb_params,
928                                 struct sk_buff *skb, u16 head)
929 {
930         u32 num_frags = skb_wrb_cnt(skb);
931         struct be_queue_info *txq = &txo->q;
932         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
933
934         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
935         be_dws_cpu_to_le(hdr, sizeof(*hdr));
936
937         BUG_ON(txo->sent_skb_list[head]);
938         txo->sent_skb_list[head] = skb;
939         txo->last_req_hdr = head;
940         atomic_add(num_frags, &txq->used);
941         txo->last_req_wrb_cnt = num_frags;
942         txo->pend_wrb_cnt += num_frags;
943 }
944
945 /* Setup a WRB fragment (buffer descriptor) for xmit */
946 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
947                                  int len)
948 {
949         struct be_eth_wrb *wrb;
950         struct be_queue_info *txq = &txo->q;
951
952         wrb = queue_head_node(txq);
953         wrb_fill(wrb, busaddr, len);
954         queue_head_inc(txq);
955 }
956
957 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
958  * was invoked. The producer index is restored to the previous packet and the
959  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
960  */
961 static void be_xmit_restore(struct be_adapter *adapter,
962                             struct be_tx_obj *txo, u32 head, bool map_single,
963                             u32 copied)
964 {
965         struct device *dev;
966         struct be_eth_wrb *wrb;
967         struct be_queue_info *txq = &txo->q;
968
969         dev = &adapter->pdev->dev;
970         txq->head = head;
971
972         /* skip the first wrb (hdr); it's not mapped */
973         queue_head_inc(txq);
974         while (copied) {
975                 wrb = queue_head_node(txq);
976                 unmap_tx_frag(dev, wrb, map_single);
977                 map_single = false;
978                 copied -= le32_to_cpu(wrb->frag_len);
979                 queue_head_inc(txq);
980         }
981
982         txq->head = head;
983 }
984
985 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
986  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
987  * of WRBs used up by the packet.
988  */
989 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
990                            struct sk_buff *skb,
991                            struct be_wrb_params *wrb_params)
992 {
993         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
994         struct device *dev = &adapter->pdev->dev;
995         struct be_queue_info *txq = &txo->q;
996         bool map_single = false;
997         u32 head = txq->head;
998         dma_addr_t busaddr;
999         int len;
1000
1001         head = be_tx_get_wrb_hdr(txo);
1002
1003         if (skb->len > skb->data_len) {
1004                 len = skb_headlen(skb);
1005
1006                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1007                 if (dma_mapping_error(dev, busaddr))
1008                         goto dma_err;
1009                 map_single = true;
1010                 be_tx_setup_wrb_frag(txo, busaddr, len);
1011                 copied += len;
1012         }
1013
1014         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1015                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1016                 len = skb_frag_size(frag);
1017
1018                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1019                 if (dma_mapping_error(dev, busaddr))
1020                         goto dma_err;
1021                 be_tx_setup_wrb_frag(txo, busaddr, len);
1022                 copied += len;
1023         }
1024
1025         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1026
1027         be_tx_stats_update(txo, skb);
1028         return wrb_cnt;
1029
1030 dma_err:
1031         adapter->drv_stats.dma_map_errors++;
1032         be_xmit_restore(adapter, txo, head, map_single, copied);
1033         return 0;
1034 }
1035
1036 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1037 {
1038         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1039 }
1040
1041 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1042                                              struct sk_buff *skb,
1043                                              struct be_wrb_params
1044                                              *wrb_params)
1045 {
1046         u16 vlan_tag = 0;
1047
1048         skb = skb_share_check(skb, GFP_ATOMIC);
1049         if (unlikely(!skb))
1050                 return skb;
1051
1052         if (skb_vlan_tag_present(skb))
1053                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1054
1055         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1056                 if (!vlan_tag)
1057                         vlan_tag = adapter->pvid;
1058                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1059                  * skip VLAN insertion
1060                  */
1061                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062         }
1063
1064         if (vlan_tag) {
1065                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1066                                                 vlan_tag);
1067                 if (unlikely(!skb))
1068                         return skb;
1069                 skb->vlan_tci = 0;
1070         }
1071
1072         /* Insert the outer VLAN, if any */
1073         if (adapter->qnq_vid) {
1074                 vlan_tag = adapter->qnq_vid;
1075                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1076                                                 vlan_tag);
1077                 if (unlikely(!skb))
1078                         return skb;
1079                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1080         }
1081
1082         return skb;
1083 }
1084
1085 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1086 {
1087         struct ethhdr *eh = (struct ethhdr *)skb->data;
1088         u16 offset = ETH_HLEN;
1089
1090         if (eh->h_proto == htons(ETH_P_IPV6)) {
1091                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1092
1093                 offset += sizeof(struct ipv6hdr);
1094                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1095                     ip6h->nexthdr != NEXTHDR_UDP) {
1096                         struct ipv6_opt_hdr *ehdr =
1097                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1098
1099                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1100                         if (ehdr->hdrlen == 0xff)
1101                                 return true;
1102                 }
1103         }
1104         return false;
1105 }
1106
1107 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1108 {
1109         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1110 }
1111
1112 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1113 {
1114         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1115 }
1116
1117 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1118                                                   struct sk_buff *skb,
1119                                                   struct be_wrb_params
1120                                                   *wrb_params)
1121 {
1122         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1123         unsigned int eth_hdr_len;
1124         struct iphdr *ip;
1125
1126         /* For padded packets, BE HW modifies tot_len field in IP header
1127          * incorrecly when VLAN tag is inserted by HW.
1128          * For padded packets, Lancer computes incorrect checksum.
1129          */
1130         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1131                                                 VLAN_ETH_HLEN : ETH_HLEN;
1132         if (skb->len <= 60 &&
1133             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1134             is_ipv4_pkt(skb)) {
1135                 ip = (struct iphdr *)ip_hdr(skb);
1136                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1137         }
1138
1139         /* If vlan tag is already inlined in the packet, skip HW VLAN
1140          * tagging in pvid-tagging mode
1141          */
1142         if (be_pvid_tagging_enabled(adapter) &&
1143             veh->h_vlan_proto == htons(ETH_P_8021Q))
1144                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1145
1146         /* HW has a bug wherein it will calculate CSUM for VLAN
1147          * pkts even though it is disabled.
1148          * Manually insert VLAN in pkt.
1149          */
1150         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1151             skb_vlan_tag_present(skb)) {
1152                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1153                 if (unlikely(!skb))
1154                         goto err;
1155         }
1156
1157         /* HW may lockup when VLAN HW tagging is requested on
1158          * certain ipv6 packets. Drop such pkts if the HW workaround to
1159          * skip HW tagging is not enabled by FW.
1160          */
1161         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1162                      (adapter->pvid || adapter->qnq_vid) &&
1163                      !qnq_async_evt_rcvd(adapter)))
1164                 goto tx_drop;
1165
1166         /* Manual VLAN tag insertion to prevent:
1167          * ASIC lockup when the ASIC inserts VLAN tag into
1168          * certain ipv6 packets. Insert VLAN tags in driver,
1169          * and set event, completion, vlan bits accordingly
1170          * in the Tx WRB.
1171          */
1172         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1173             be_vlan_tag_tx_chk(adapter, skb)) {
1174                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1175                 if (unlikely(!skb))
1176                         goto err;
1177         }
1178
1179         return skb;
1180 tx_drop:
1181         dev_kfree_skb_any(skb);
1182 err:
1183         return NULL;
1184 }
1185
1186 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1187                                            struct sk_buff *skb,
1188                                            struct be_wrb_params *wrb_params)
1189 {
1190         int err;
1191
1192         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1193          * packets that are 32b or less may cause a transmit stall
1194          * on that port. The workaround is to pad such packets
1195          * (len <= 32 bytes) to a minimum length of 36b.
1196          */
1197         if (skb->len <= 32) {
1198                 if (skb_put_padto(skb, 36))
1199                         return NULL;
1200         }
1201
1202         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1203                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1204                 if (!skb)
1205                         return NULL;
1206         }
1207
1208         /* The stack can send us skbs with length greater than
1209          * what the HW can handle. Trim the extra bytes.
1210          */
1211         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1212         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1213         WARN_ON(err);
1214
1215         return skb;
1216 }
1217
1218 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1219 {
1220         struct be_queue_info *txq = &txo->q;
1221         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1222
1223         /* Mark the last request eventable if it hasn't been marked already */
1224         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1225                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1226
1227         /* compose a dummy wrb if there are odd set of wrbs to notify */
1228         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1229                 wrb_fill_dummy(queue_head_node(txq));
1230                 queue_head_inc(txq);
1231                 atomic_inc(&txq->used);
1232                 txo->pend_wrb_cnt++;
1233                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1234                                            TX_HDR_WRB_NUM_SHIFT);
1235                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1236                                           TX_HDR_WRB_NUM_SHIFT);
1237         }
1238         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1239         txo->pend_wrb_cnt = 0;
1240 }
1241
1242 /* OS2BMC related */
1243
1244 #define DHCP_CLIENT_PORT        68
1245 #define DHCP_SERVER_PORT        67
1246 #define NET_BIOS_PORT1          137
1247 #define NET_BIOS_PORT2          138
1248 #define DHCPV6_RAS_PORT         547
1249
1250 #define is_mc_allowed_on_bmc(adapter, eh)       \
1251         (!is_multicast_filt_enabled(adapter) && \
1252          is_multicast_ether_addr(eh->h_dest) && \
1253          !is_broadcast_ether_addr(eh->h_dest))
1254
1255 #define is_bc_allowed_on_bmc(adapter, eh)       \
1256         (!is_broadcast_filt_enabled(adapter) && \
1257          is_broadcast_ether_addr(eh->h_dest))
1258
1259 #define is_arp_allowed_on_bmc(adapter, skb)     \
1260         (is_arp(skb) && is_arp_filt_enabled(adapter))
1261
1262 #define is_broadcast_packet(eh, adapter)        \
1263                 (is_multicast_ether_addr(eh->h_dest) && \
1264                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1265
1266 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1267
1268 #define is_arp_filt_enabled(adapter)    \
1269                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1270
1271 #define is_dhcp_client_filt_enabled(adapter)    \
1272                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1273
1274 #define is_dhcp_srvr_filt_enabled(adapter)      \
1275                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1276
1277 #define is_nbios_filt_enabled(adapter)  \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1279
1280 #define is_ipv6_na_filt_enabled(adapter)        \
1281                 (adapter->bmc_filt_mask &       \
1282                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1283
1284 #define is_ipv6_ra_filt_enabled(adapter)        \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1286
1287 #define is_ipv6_ras_filt_enabled(adapter)       \
1288                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1289
1290 #define is_broadcast_filt_enabled(adapter)      \
1291                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1292
1293 #define is_multicast_filt_enabled(adapter)      \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1295
1296 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1297                                struct sk_buff **skb)
1298 {
1299         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1300         bool os2bmc = false;
1301
1302         if (!be_is_os2bmc_enabled(adapter))
1303                 goto done;
1304
1305         if (!is_multicast_ether_addr(eh->h_dest))
1306                 goto done;
1307
1308         if (is_mc_allowed_on_bmc(adapter, eh) ||
1309             is_bc_allowed_on_bmc(adapter, eh) ||
1310             is_arp_allowed_on_bmc(adapter, (*skb))) {
1311                 os2bmc = true;
1312                 goto done;
1313         }
1314
1315         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1316                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1317                 u8 nexthdr = hdr->nexthdr;
1318
1319                 if (nexthdr == IPPROTO_ICMPV6) {
1320                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1321
1322                         switch (icmp6->icmp6_type) {
1323                         case NDISC_ROUTER_ADVERTISEMENT:
1324                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1325                                 goto done;
1326                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1327                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1328                                 goto done;
1329                         default:
1330                                 break;
1331                         }
1332                 }
1333         }
1334
1335         if (is_udp_pkt((*skb))) {
1336                 struct udphdr *udp = udp_hdr((*skb));
1337
1338                 switch (ntohs(udp->dest)) {
1339                 case DHCP_CLIENT_PORT:
1340                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1341                         goto done;
1342                 case DHCP_SERVER_PORT:
1343                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1344                         goto done;
1345                 case NET_BIOS_PORT1:
1346                 case NET_BIOS_PORT2:
1347                         os2bmc = is_nbios_filt_enabled(adapter);
1348                         goto done;
1349                 case DHCPV6_RAS_PORT:
1350                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1351                         goto done;
1352                 default:
1353                         break;
1354                 }
1355         }
1356 done:
1357         /* For packets over a vlan, which are destined
1358          * to BMC, asic expects the vlan to be inline in the packet.
1359          */
1360         if (os2bmc)
1361                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1362
1363         return os2bmc;
1364 }
1365
1366 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1367 {
1368         struct be_adapter *adapter = netdev_priv(netdev);
1369         u16 q_idx = skb_get_queue_mapping(skb);
1370         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1371         struct be_wrb_params wrb_params = { 0 };
1372         bool flush = !skb->xmit_more;
1373         u16 wrb_cnt;
1374
1375         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1376         if (unlikely(!skb))
1377                 goto drop;
1378
1379         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1380
1381         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1382         if (unlikely(!wrb_cnt)) {
1383                 dev_kfree_skb_any(skb);
1384                 goto drop;
1385         }
1386
1387         /* if os2bmc is enabled and if the pkt is destined to bmc,
1388          * enqueue the pkt a 2nd time with mgmt bit set.
1389          */
1390         if (be_send_pkt_to_bmc(adapter, &skb)) {
1391                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1392                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393                 if (unlikely(!wrb_cnt))
1394                         goto drop;
1395                 else
1396                         skb_get(skb);
1397         }
1398
1399         if (be_is_txq_full(txo)) {
1400                 netif_stop_subqueue(netdev, q_idx);
1401                 tx_stats(txo)->tx_stops++;
1402         }
1403
1404         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1405                 be_xmit_flush(adapter, txo);
1406
1407         return NETDEV_TX_OK;
1408 drop:
1409         tx_stats(txo)->tx_drv_drops++;
1410         /* Flush the already enqueued tx requests */
1411         if (flush && txo->pend_wrb_cnt)
1412                 be_xmit_flush(adapter, txo);
1413
1414         return NETDEV_TX_OK;
1415 }
1416
1417 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1418 {
1419         struct be_adapter *adapter = netdev_priv(netdev);
1420         struct device *dev = &adapter->pdev->dev;
1421
1422         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1423                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1424                          BE_MIN_MTU, BE_MAX_MTU);
1425                 return -EINVAL;
1426         }
1427
1428         dev_info(dev, "MTU changed from %d to %d bytes\n",
1429                  netdev->mtu, new_mtu);
1430         netdev->mtu = new_mtu;
1431         return 0;
1432 }
1433
1434 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1435 {
1436         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1437                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1438 }
1439
1440 static int be_set_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1446                 return 0;
1447
1448         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1449         if (!status) {
1450                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1451                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1452         } else {
1453                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1454         }
1455         return status;
1456 }
1457
1458 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1459 {
1460         struct device *dev = &adapter->pdev->dev;
1461         int status;
1462
1463         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1464         if (!status) {
1465                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1466                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1467         }
1468         return status;
1469 }
1470
1471 /*
1472  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1473  * If the user configures more, place BE in vlan promiscuous mode.
1474  */
1475 static int be_vid_config(struct be_adapter *adapter)
1476 {
1477         struct device *dev = &adapter->pdev->dev;
1478         u16 vids[BE_NUM_VLANS_SUPPORTED];
1479         u16 num = 0, i = 0;
1480         int status = 0;
1481
1482         /* No need to change the VLAN state if the I/F is in promiscuous */
1483         if (adapter->netdev->flags & IFF_PROMISC)
1484                 return 0;
1485
1486         if (adapter->vlans_added > be_max_vlans(adapter))
1487                 return be_set_vlan_promisc(adapter);
1488
1489         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1490                 status = be_clear_vlan_promisc(adapter);
1491                 if (status)
1492                         return status;
1493         }
1494         /* Construct VLAN Table to give to HW */
1495         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1496                 vids[num++] = cpu_to_le16(i);
1497
1498         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1499         if (status) {
1500                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1501                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1502                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1503                     addl_status(status) ==
1504                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1505                         return be_set_vlan_promisc(adapter);
1506         }
1507         return status;
1508 }
1509
1510 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1511 {
1512         struct be_adapter *adapter = netdev_priv(netdev);
1513         int status = 0;
1514
1515         mutex_lock(&adapter->rx_filter_lock);
1516
1517         /* Packets with VID 0 are always received by Lancer by default */
1518         if (lancer_chip(adapter) && vid == 0)
1519                 goto done;
1520
1521         if (test_bit(vid, adapter->vids))
1522                 goto done;
1523
1524         set_bit(vid, adapter->vids);
1525         adapter->vlans_added++;
1526
1527         status = be_vid_config(adapter);
1528 done:
1529         mutex_unlock(&adapter->rx_filter_lock);
1530         return status;
1531 }
1532
1533 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1534 {
1535         struct be_adapter *adapter = netdev_priv(netdev);
1536         int status = 0;
1537
1538         mutex_lock(&adapter->rx_filter_lock);
1539
1540         /* Packets with VID 0 are always received by Lancer by default */
1541         if (lancer_chip(adapter) && vid == 0)
1542                 goto done;
1543
1544         if (!test_bit(vid, adapter->vids))
1545                 goto done;
1546
1547         clear_bit(vid, adapter->vids);
1548         adapter->vlans_added--;
1549
1550         status = be_vid_config(adapter);
1551 done:
1552         mutex_unlock(&adapter->rx_filter_lock);
1553         return status;
1554 }
1555
1556 static void be_set_all_promisc(struct be_adapter *adapter)
1557 {
1558         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1559         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1560 }
1561
1562 static void be_set_mc_promisc(struct be_adapter *adapter)
1563 {
1564         int status;
1565
1566         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1567                 return;
1568
1569         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1570         if (!status)
1571                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1572 }
1573
1574 static void be_set_uc_promisc(struct be_adapter *adapter)
1575 {
1576         int status;
1577
1578         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1579                 return;
1580
1581         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1582         if (!status)
1583                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1584 }
1585
1586 static void be_clear_uc_promisc(struct be_adapter *adapter)
1587 {
1588         int status;
1589
1590         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1591                 return;
1592
1593         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1594         if (!status)
1595                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1596 }
1597
1598 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1599  * We use a single callback function for both sync and unsync. We really don't
1600  * add/remove addresses through this callback. But, we use it to detect changes
1601  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1602  */
1603 static int be_uc_list_update(struct net_device *netdev,
1604                              const unsigned char *addr)
1605 {
1606         struct be_adapter *adapter = netdev_priv(netdev);
1607
1608         adapter->update_uc_list = true;
1609         return 0;
1610 }
1611
1612 static int be_mc_list_update(struct net_device *netdev,
1613                              const unsigned char *addr)
1614 {
1615         struct be_adapter *adapter = netdev_priv(netdev);
1616
1617         adapter->update_mc_list = true;
1618         return 0;
1619 }
1620
1621 static void be_set_mc_list(struct be_adapter *adapter)
1622 {
1623         struct net_device *netdev = adapter->netdev;
1624         struct netdev_hw_addr *ha;
1625         bool mc_promisc = false;
1626         int status;
1627
1628         netif_addr_lock_bh(netdev);
1629         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1630
1631         if (netdev->flags & IFF_PROMISC) {
1632                 adapter->update_mc_list = false;
1633         } else if (netdev->flags & IFF_ALLMULTI ||
1634                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1635                 /* Enable multicast promisc if num configured exceeds
1636                  * what we support
1637                  */
1638                 mc_promisc = true;
1639                 adapter->update_mc_list = false;
1640         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1641                 /* Update mc-list unconditionally if the iface was previously
1642                  * in mc-promisc mode and now is out of that mode.
1643                  */
1644                 adapter->update_mc_list = true;
1645         }
1646
1647         if (adapter->update_mc_list) {
1648                 int i = 0;
1649
1650                 /* cache the mc-list in adapter */
1651                 netdev_for_each_mc_addr(ha, netdev) {
1652                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1653                         i++;
1654                 }
1655                 adapter->mc_count = netdev_mc_count(netdev);
1656         }
1657         netif_addr_unlock_bh(netdev);
1658
1659         if (mc_promisc) {
1660                 be_set_mc_promisc(adapter);
1661         } else if (adapter->update_mc_list) {
1662                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1663                 if (!status)
1664                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1665                 else
1666                         be_set_mc_promisc(adapter);
1667
1668                 adapter->update_mc_list = false;
1669         }
1670 }
1671
1672 static void be_clear_mc_list(struct be_adapter *adapter)
1673 {
1674         struct net_device *netdev = adapter->netdev;
1675
1676         __dev_mc_unsync(netdev, NULL);
1677         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1678         adapter->mc_count = 0;
1679 }
1680
1681 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1682 {
1683         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1684                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1685                 return 0;
1686         }
1687
1688         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1689                                adapter->if_handle,
1690                                &adapter->pmac_id[uc_idx + 1], 0);
1691 }
1692
1693 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1694 {
1695         if (pmac_id == adapter->pmac_id[0])
1696                 return;
1697
1698         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1699 }
1700
1701 static void be_set_uc_list(struct be_adapter *adapter)
1702 {
1703         struct net_device *netdev = adapter->netdev;
1704         struct netdev_hw_addr *ha;
1705         bool uc_promisc = false;
1706         int curr_uc_macs = 0, i;
1707
1708         netif_addr_lock_bh(netdev);
1709         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1710
1711         if (netdev->flags & IFF_PROMISC) {
1712                 adapter->update_uc_list = false;
1713         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1714                 uc_promisc = true;
1715                 adapter->update_uc_list = false;
1716         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1717                 /* Update uc-list unconditionally if the iface was previously
1718                  * in uc-promisc mode and now is out of that mode.
1719                  */
1720                 adapter->update_uc_list = true;
1721         }
1722
1723         if (adapter->update_uc_list) {
1724                 /* cache the uc-list in adapter array */
1725                 i = 0;
1726                 netdev_for_each_uc_addr(ha, netdev) {
1727                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1728                         i++;
1729                 }
1730                 curr_uc_macs = netdev_uc_count(netdev);
1731         }
1732         netif_addr_unlock_bh(netdev);
1733
1734         if (uc_promisc) {
1735                 be_set_uc_promisc(adapter);
1736         } else if (adapter->update_uc_list) {
1737                 be_clear_uc_promisc(adapter);
1738
1739                 for (i = 0; i < adapter->uc_macs; i++)
1740                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1741
1742                 for (i = 0; i < curr_uc_macs; i++)
1743                         be_uc_mac_add(adapter, i);
1744                 adapter->uc_macs = curr_uc_macs;
1745                 adapter->update_uc_list = false;
1746         }
1747 }
1748
1749 static void be_clear_uc_list(struct be_adapter *adapter)
1750 {
1751         struct net_device *netdev = adapter->netdev;
1752         int i;
1753
1754         __dev_uc_unsync(netdev, NULL);
1755         for (i = 0; i < adapter->uc_macs; i++)
1756                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1757
1758         adapter->uc_macs = 0;
1759 }
1760
1761 static void __be_set_rx_mode(struct be_adapter *adapter)
1762 {
1763         struct net_device *netdev = adapter->netdev;
1764
1765         mutex_lock(&adapter->rx_filter_lock);
1766
1767         if (netdev->flags & IFF_PROMISC) {
1768                 if (!be_in_all_promisc(adapter))
1769                         be_set_all_promisc(adapter);
1770         } else if (be_in_all_promisc(adapter)) {
1771                 /* We need to re-program the vlan-list or clear
1772                  * vlan-promisc mode (if needed) when the interface
1773                  * comes out of promisc mode.
1774                  */
1775                 be_vid_config(adapter);
1776         }
1777
1778         be_set_uc_list(adapter);
1779         be_set_mc_list(adapter);
1780
1781         mutex_unlock(&adapter->rx_filter_lock);
1782 }
1783
1784 static void be_work_set_rx_mode(struct work_struct *work)
1785 {
1786         struct be_cmd_work *cmd_work =
1787                                 container_of(work, struct be_cmd_work, work);
1788
1789         __be_set_rx_mode(cmd_work->adapter);
1790         kfree(cmd_work);
1791 }
1792
1793 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1794 {
1795         struct be_adapter *adapter = netdev_priv(netdev);
1796         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1797         int status;
1798
1799         if (!sriov_enabled(adapter))
1800                 return -EPERM;
1801
1802         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1803                 return -EINVAL;
1804
1805         /* Proceed further only if user provided MAC is different
1806          * from active MAC
1807          */
1808         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1809                 return 0;
1810
1811         if (BEx_chip(adapter)) {
1812                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1813                                 vf + 1);
1814
1815                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1816                                          &vf_cfg->pmac_id, vf + 1);
1817         } else {
1818                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1819                                         vf + 1);
1820         }
1821
1822         if (status) {
1823                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1824                         mac, vf, status);
1825                 return be_cmd_status(status);
1826         }
1827
1828         ether_addr_copy(vf_cfg->mac_addr, mac);
1829
1830         return 0;
1831 }
1832
1833 static int be_get_vf_config(struct net_device *netdev, int vf,
1834                             struct ifla_vf_info *vi)
1835 {
1836         struct be_adapter *adapter = netdev_priv(netdev);
1837         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1838
1839         if (!sriov_enabled(adapter))
1840                 return -EPERM;
1841
1842         if (vf >= adapter->num_vfs)
1843                 return -EINVAL;
1844
1845         vi->vf = vf;
1846         vi->max_tx_rate = vf_cfg->tx_rate;
1847         vi->min_tx_rate = 0;
1848         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1849         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1850         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1851         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1852         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1853
1854         return 0;
1855 }
1856
1857 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1858 {
1859         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860         u16 vids[BE_NUM_VLANS_SUPPORTED];
1861         int vf_if_id = vf_cfg->if_handle;
1862         int status;
1863
1864         /* Enable Transparent VLAN Tagging */
1865         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1866         if (status)
1867                 return status;
1868
1869         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1870         vids[0] = 0;
1871         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1872         if (!status)
1873                 dev_info(&adapter->pdev->dev,
1874                          "Cleared guest VLANs on VF%d", vf);
1875
1876         /* After TVT is enabled, disallow VFs to program VLAN filters */
1877         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1878                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1879                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1880                 if (!status)
1881                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1882         }
1883         return 0;
1884 }
1885
1886 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1887 {
1888         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1889         struct device *dev = &adapter->pdev->dev;
1890         int status;
1891
1892         /* Reset Transparent VLAN Tagging. */
1893         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1894                                        vf_cfg->if_handle, 0, 0);
1895         if (status)
1896                 return status;
1897
1898         /* Allow VFs to program VLAN filtering */
1899         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1900                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1901                                                   BE_PRIV_FILTMGMT, vf + 1);
1902                 if (!status) {
1903                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1904                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1905                 }
1906         }
1907
1908         dev_info(dev,
1909                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1910         return 0;
1911 }
1912
1913 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1914                           __be16 vlan_proto)
1915 {
1916         struct be_adapter *adapter = netdev_priv(netdev);
1917         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1918         int status;
1919
1920         if (!sriov_enabled(adapter))
1921                 return -EPERM;
1922
1923         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1924                 return -EINVAL;
1925
1926         if (vlan_proto != htons(ETH_P_8021Q))
1927                 return -EPROTONOSUPPORT;
1928
1929         if (vlan || qos) {
1930                 vlan |= qos << VLAN_PRIO_SHIFT;
1931                 status = be_set_vf_tvt(adapter, vf, vlan);
1932         } else {
1933                 status = be_clear_vf_tvt(adapter, vf);
1934         }
1935
1936         if (status) {
1937                 dev_err(&adapter->pdev->dev,
1938                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1939                         status);
1940                 return be_cmd_status(status);
1941         }
1942
1943         vf_cfg->vlan_tag = vlan;
1944         return 0;
1945 }
1946
1947 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1948                              int min_tx_rate, int max_tx_rate)
1949 {
1950         struct be_adapter *adapter = netdev_priv(netdev);
1951         struct device *dev = &adapter->pdev->dev;
1952         int percent_rate, status = 0;
1953         u16 link_speed = 0;
1954         u8 link_status;
1955
1956         if (!sriov_enabled(adapter))
1957                 return -EPERM;
1958
1959         if (vf >= adapter->num_vfs)
1960                 return -EINVAL;
1961
1962         if (min_tx_rate)
1963                 return -EINVAL;
1964
1965         if (!max_tx_rate)
1966                 goto config_qos;
1967
1968         status = be_cmd_link_status_query(adapter, &link_speed,
1969                                           &link_status, 0);
1970         if (status)
1971                 goto err;
1972
1973         if (!link_status) {
1974                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1975                 status = -ENETDOWN;
1976                 goto err;
1977         }
1978
1979         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1980                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1981                         link_speed);
1982                 status = -EINVAL;
1983                 goto err;
1984         }
1985
1986         /* On Skyhawk the QOS setting must be done only as a % value */
1987         percent_rate = link_speed / 100;
1988         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1989                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1990                         percent_rate);
1991                 status = -EINVAL;
1992                 goto err;
1993         }
1994
1995 config_qos:
1996         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1997         if (status)
1998                 goto err;
1999
2000         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2001         return 0;
2002
2003 err:
2004         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2005                 max_tx_rate, vf);
2006         return be_cmd_status(status);
2007 }
2008
2009 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2010                                 int link_state)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         int status;
2014
2015         if (!sriov_enabled(adapter))
2016                 return -EPERM;
2017
2018         if (vf >= adapter->num_vfs)
2019                 return -EINVAL;
2020
2021         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2022         if (status) {
2023                 dev_err(&adapter->pdev->dev,
2024                         "Link state change on VF %d failed: %#x\n", vf, status);
2025                 return be_cmd_status(status);
2026         }
2027
2028         adapter->vf_cfg[vf].plink_tracking = link_state;
2029
2030         return 0;
2031 }
2032
2033 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2034 {
2035         struct be_adapter *adapter = netdev_priv(netdev);
2036         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2037         u8 spoofchk;
2038         int status;
2039
2040         if (!sriov_enabled(adapter))
2041                 return -EPERM;
2042
2043         if (vf >= adapter->num_vfs)
2044                 return -EINVAL;
2045
2046         if (BEx_chip(adapter))
2047                 return -EOPNOTSUPP;
2048
2049         if (enable == vf_cfg->spoofchk)
2050                 return 0;
2051
2052         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2053
2054         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2055                                        0, spoofchk);
2056         if (status) {
2057                 dev_err(&adapter->pdev->dev,
2058                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2059                 return be_cmd_status(status);
2060         }
2061
2062         vf_cfg->spoofchk = enable;
2063         return 0;
2064 }
2065
2066 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2067                           ulong now)
2068 {
2069         aic->rx_pkts_prev = rx_pkts;
2070         aic->tx_reqs_prev = tx_pkts;
2071         aic->jiffies = now;
2072 }
2073
2074 static int be_get_new_eqd(struct be_eq_obj *eqo)
2075 {
2076         struct be_adapter *adapter = eqo->adapter;
2077         int eqd, start;
2078         struct be_aic_obj *aic;
2079         struct be_rx_obj *rxo;
2080         struct be_tx_obj *txo;
2081         u64 rx_pkts = 0, tx_pkts = 0;
2082         ulong now;
2083         u32 pps, delta;
2084         int i;
2085
2086         aic = &adapter->aic_obj[eqo->idx];
2087         if (!aic->enable) {
2088                 if (aic->jiffies)
2089                         aic->jiffies = 0;
2090                 eqd = aic->et_eqd;
2091                 return eqd;
2092         }
2093
2094         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2095                 do {
2096                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2097                         rx_pkts += rxo->stats.rx_pkts;
2098                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2099         }
2100
2101         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2102                 do {
2103                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2104                         tx_pkts += txo->stats.tx_reqs;
2105                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2106         }
2107
2108         /* Skip, if wrapped around or first calculation */
2109         now = jiffies;
2110         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2111             rx_pkts < aic->rx_pkts_prev ||
2112             tx_pkts < aic->tx_reqs_prev) {
2113                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2114                 return aic->prev_eqd;
2115         }
2116
2117         delta = jiffies_to_msecs(now - aic->jiffies);
2118         if (delta == 0)
2119                 return aic->prev_eqd;
2120
2121         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2122                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2123         eqd = (pps / 15000) << 2;
2124
2125         if (eqd < 8)
2126                 eqd = 0;
2127         eqd = min_t(u32, eqd, aic->max_eqd);
2128         eqd = max_t(u32, eqd, aic->min_eqd);
2129
2130         be_aic_update(aic, rx_pkts, tx_pkts, now);
2131
2132         return eqd;
2133 }
2134
2135 /* For Skyhawk-R only */
2136 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2140         ulong now = jiffies;
2141         int eqd;
2142         u32 mult_enc;
2143
2144         if (!aic->enable)
2145                 return 0;
2146
2147         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2148                 eqd = aic->prev_eqd;
2149         else
2150                 eqd = be_get_new_eqd(eqo);
2151
2152         if (eqd > 100)
2153                 mult_enc = R2I_DLY_ENC_1;
2154         else if (eqd > 60)
2155                 mult_enc = R2I_DLY_ENC_2;
2156         else if (eqd > 20)
2157                 mult_enc = R2I_DLY_ENC_3;
2158         else
2159                 mult_enc = R2I_DLY_ENC_0;
2160
2161         aic->prev_eqd = eqd;
2162
2163         return mult_enc;
2164 }
2165
2166 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2167 {
2168         struct be_set_eqd set_eqd[MAX_EVT_QS];
2169         struct be_aic_obj *aic;
2170         struct be_eq_obj *eqo;
2171         int i, num = 0, eqd;
2172
2173         for_all_evt_queues(adapter, eqo, i) {
2174                 aic = &adapter->aic_obj[eqo->idx];
2175                 eqd = be_get_new_eqd(eqo);
2176                 if (force_update || eqd != aic->prev_eqd) {
2177                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2178                         set_eqd[num].eq_id = eqo->q.id;
2179                         aic->prev_eqd = eqd;
2180                         num++;
2181                 }
2182         }
2183
2184         if (num)
2185                 be_cmd_modify_eqd(adapter, set_eqd, num);
2186 }
2187
2188 static void be_rx_stats_update(struct be_rx_obj *rxo,
2189                                struct be_rx_compl_info *rxcp)
2190 {
2191         struct be_rx_stats *stats = rx_stats(rxo);
2192
2193         u64_stats_update_begin(&stats->sync);
2194         stats->rx_compl++;
2195         stats->rx_bytes += rxcp->pkt_size;
2196         stats->rx_pkts++;
2197         if (rxcp->tunneled)
2198                 stats->rx_vxlan_offload_pkts++;
2199         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2200                 stats->rx_mcast_pkts++;
2201         if (rxcp->err)
2202                 stats->rx_compl_err++;
2203         u64_stats_update_end(&stats->sync);
2204 }
2205
2206 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2207 {
2208         /* L4 checksum is not reliable for non TCP/UDP packets.
2209          * Also ignore ipcksm for ipv6 pkts
2210          */
2211         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2212                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2213 }
2214
2215 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2216 {
2217         struct be_adapter *adapter = rxo->adapter;
2218         struct be_rx_page_info *rx_page_info;
2219         struct be_queue_info *rxq = &rxo->q;
2220         u32 frag_idx = rxq->tail;
2221
2222         rx_page_info = &rxo->page_info_tbl[frag_idx];
2223         BUG_ON(!rx_page_info->page);
2224
2225         if (rx_page_info->last_frag) {
2226                 dma_unmap_page(&adapter->pdev->dev,
2227                                dma_unmap_addr(rx_page_info, bus),
2228                                adapter->big_page_size, DMA_FROM_DEVICE);
2229                 rx_page_info->last_frag = false;
2230         } else {
2231                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2232                                         dma_unmap_addr(rx_page_info, bus),
2233                                         rx_frag_size, DMA_FROM_DEVICE);
2234         }
2235
2236         queue_tail_inc(rxq);
2237         atomic_dec(&rxq->used);
2238         return rx_page_info;
2239 }
2240
2241 /* Throwaway the data in the Rx completion */
2242 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2243                                 struct be_rx_compl_info *rxcp)
2244 {
2245         struct be_rx_page_info *page_info;
2246         u16 i, num_rcvd = rxcp->num_rcvd;
2247
2248         for (i = 0; i < num_rcvd; i++) {
2249                 page_info = get_rx_page_info(rxo);
2250                 put_page(page_info->page);
2251                 memset(page_info, 0, sizeof(*page_info));
2252         }
2253 }
2254
2255 /*
2256  * skb_fill_rx_data forms a complete skb for an ether frame
2257  * indicated by rxcp.
2258  */
2259 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2260                              struct be_rx_compl_info *rxcp)
2261 {
2262         struct be_rx_page_info *page_info;
2263         u16 i, j;
2264         u16 hdr_len, curr_frag_len, remaining;
2265         u8 *start;
2266
2267         page_info = get_rx_page_info(rxo);
2268         start = page_address(page_info->page) + page_info->page_offset;
2269         prefetch(start);
2270
2271         /* Copy data in the first descriptor of this completion */
2272         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2273
2274         skb->len = curr_frag_len;
2275         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2276                 memcpy(skb->data, start, curr_frag_len);
2277                 /* Complete packet has now been moved to data */
2278                 put_page(page_info->page);
2279                 skb->data_len = 0;
2280                 skb->tail += curr_frag_len;
2281         } else {
2282                 hdr_len = ETH_HLEN;
2283                 memcpy(skb->data, start, hdr_len);
2284                 skb_shinfo(skb)->nr_frags = 1;
2285                 skb_frag_set_page(skb, 0, page_info->page);
2286                 skb_shinfo(skb)->frags[0].page_offset =
2287                                         page_info->page_offset + hdr_len;
2288                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2289                                   curr_frag_len - hdr_len);
2290                 skb->data_len = curr_frag_len - hdr_len;
2291                 skb->truesize += rx_frag_size;
2292                 skb->tail += hdr_len;
2293         }
2294         page_info->page = NULL;
2295
2296         if (rxcp->pkt_size <= rx_frag_size) {
2297                 BUG_ON(rxcp->num_rcvd != 1);
2298                 return;
2299         }
2300
2301         /* More frags present for this completion */
2302         remaining = rxcp->pkt_size - curr_frag_len;
2303         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2304                 page_info = get_rx_page_info(rxo);
2305                 curr_frag_len = min(remaining, rx_frag_size);
2306
2307                 /* Coalesce all frags from the same physical page in one slot */
2308                 if (page_info->page_offset == 0) {
2309                         /* Fresh page */
2310                         j++;
2311                         skb_frag_set_page(skb, j, page_info->page);
2312                         skb_shinfo(skb)->frags[j].page_offset =
2313                                                         page_info->page_offset;
2314                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2315                         skb_shinfo(skb)->nr_frags++;
2316                 } else {
2317                         put_page(page_info->page);
2318                 }
2319
2320                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2321                 skb->len += curr_frag_len;
2322                 skb->data_len += curr_frag_len;
2323                 skb->truesize += rx_frag_size;
2324                 remaining -= curr_frag_len;
2325                 page_info->page = NULL;
2326         }
2327         BUG_ON(j > MAX_SKB_FRAGS);
2328 }
2329
2330 /* Process the RX completion indicated by rxcp when GRO is disabled */
2331 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2332                                 struct be_rx_compl_info *rxcp)
2333 {
2334         struct be_adapter *adapter = rxo->adapter;
2335         struct net_device *netdev = adapter->netdev;
2336         struct sk_buff *skb;
2337
2338         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2339         if (unlikely(!skb)) {
2340                 rx_stats(rxo)->rx_drops_no_skbs++;
2341                 be_rx_compl_discard(rxo, rxcp);
2342                 return;
2343         }
2344
2345         skb_fill_rx_data(rxo, skb, rxcp);
2346
2347         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2348                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2349         else
2350                 skb_checksum_none_assert(skb);
2351
2352         skb->protocol = eth_type_trans(skb, netdev);
2353         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2354         if (netdev->features & NETIF_F_RXHASH)
2355                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2356
2357         skb->csum_level = rxcp->tunneled;
2358         skb_mark_napi_id(skb, napi);
2359
2360         if (rxcp->vlanf)
2361                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2362
2363         netif_receive_skb(skb);
2364 }
2365
2366 /* Process the RX completion indicated by rxcp when GRO is enabled */
2367 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2368                                     struct napi_struct *napi,
2369                                     struct be_rx_compl_info *rxcp)
2370 {
2371         struct be_adapter *adapter = rxo->adapter;
2372         struct be_rx_page_info *page_info;
2373         struct sk_buff *skb = NULL;
2374         u16 remaining, curr_frag_len;
2375         u16 i, j;
2376
2377         skb = napi_get_frags(napi);
2378         if (!skb) {
2379                 be_rx_compl_discard(rxo, rxcp);
2380                 return;
2381         }
2382
2383         remaining = rxcp->pkt_size;
2384         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2385                 page_info = get_rx_page_info(rxo);
2386
2387                 curr_frag_len = min(remaining, rx_frag_size);
2388
2389                 /* Coalesce all frags from the same physical page in one slot */
2390                 if (i == 0 || page_info->page_offset == 0) {
2391                         /* First frag or Fresh page */
2392                         j++;
2393                         skb_frag_set_page(skb, j, page_info->page);
2394                         skb_shinfo(skb)->frags[j].page_offset =
2395                                                         page_info->page_offset;
2396                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2397                 } else {
2398                         put_page(page_info->page);
2399                 }
2400                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2401                 skb->truesize += rx_frag_size;
2402                 remaining -= curr_frag_len;
2403                 memset(page_info, 0, sizeof(*page_info));
2404         }
2405         BUG_ON(j > MAX_SKB_FRAGS);
2406
2407         skb_shinfo(skb)->nr_frags = j + 1;
2408         skb->len = rxcp->pkt_size;
2409         skb->data_len = rxcp->pkt_size;
2410         skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2412         if (adapter->netdev->features & NETIF_F_RXHASH)
2413                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2414
2415         skb->csum_level = rxcp->tunneled;
2416
2417         if (rxcp->vlanf)
2418                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2419
2420         napi_gro_frags(napi);
2421 }
2422
2423 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2424                                  struct be_rx_compl_info *rxcp)
2425 {
2426         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2427         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2428         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2429         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2430         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2431         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2432         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2433         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2434         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2435         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2436         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2437         if (rxcp->vlanf) {
2438                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2439                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2440         }
2441         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2442         rxcp->tunneled =
2443                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2444 }
2445
2446 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2447                                  struct be_rx_compl_info *rxcp)
2448 {
2449         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2450         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2451         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2452         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2453         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2454         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2455         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2456         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2457         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2458         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2459         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2460         if (rxcp->vlanf) {
2461                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2462                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2463         }
2464         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2465         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2466 }
2467
2468 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2469 {
2470         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2471         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2472         struct be_adapter *adapter = rxo->adapter;
2473
2474         /* For checking the valid bit it is Ok to use either definition as the
2475          * valid bit is at the same position in both v0 and v1 Rx compl */
2476         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2477                 return NULL;
2478
2479         rmb();
2480         be_dws_le_to_cpu(compl, sizeof(*compl));
2481
2482         if (adapter->be3_native)
2483                 be_parse_rx_compl_v1(compl, rxcp);
2484         else
2485                 be_parse_rx_compl_v0(compl, rxcp);
2486
2487         if (rxcp->ip_frag)
2488                 rxcp->l4_csum = 0;
2489
2490         if (rxcp->vlanf) {
2491                 /* In QNQ modes, if qnq bit is not set, then the packet was
2492                  * tagged only with the transparent outer vlan-tag and must
2493                  * not be treated as a vlan packet by host
2494                  */
2495                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2496                         rxcp->vlanf = 0;
2497
2498                 if (!lancer_chip(adapter))
2499                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2500
2501                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2502                     !test_bit(rxcp->vlan_tag, adapter->vids))
2503                         rxcp->vlanf = 0;
2504         }
2505
2506         /* As the compl has been parsed, reset it; we wont touch it again */
2507         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2508
2509         queue_tail_inc(&rxo->cq);
2510         return rxcp;
2511 }
2512
2513 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2514 {
2515         u32 order = get_order(size);
2516
2517         if (order > 0)
2518                 gfp |= __GFP_COMP;
2519         return  alloc_pages(gfp, order);
2520 }
2521
2522 /*
2523  * Allocate a page, split it to fragments of size rx_frag_size and post as
2524  * receive buffers to BE
2525  */
2526 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2527 {
2528         struct be_adapter *adapter = rxo->adapter;
2529         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2530         struct be_queue_info *rxq = &rxo->q;
2531         struct page *pagep = NULL;
2532         struct device *dev = &adapter->pdev->dev;
2533         struct be_eth_rx_d *rxd;
2534         u64 page_dmaaddr = 0, frag_dmaaddr;
2535         u32 posted, page_offset = 0, notify = 0;
2536
2537         page_info = &rxo->page_info_tbl[rxq->head];
2538         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2539                 if (!pagep) {
2540                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2541                         if (unlikely(!pagep)) {
2542                                 rx_stats(rxo)->rx_post_fail++;
2543                                 break;
2544                         }
2545                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2546                                                     adapter->big_page_size,
2547                                                     DMA_FROM_DEVICE);
2548                         if (dma_mapping_error(dev, page_dmaaddr)) {
2549                                 put_page(pagep);
2550                                 pagep = NULL;
2551                                 adapter->drv_stats.dma_map_errors++;
2552                                 break;
2553                         }
2554                         page_offset = 0;
2555                 } else {
2556                         get_page(pagep);
2557                         page_offset += rx_frag_size;
2558                 }
2559                 page_info->page_offset = page_offset;
2560                 page_info->page = pagep;
2561
2562                 rxd = queue_head_node(rxq);
2563                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2564                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2565                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2566
2567                 /* Any space left in the current big page for another frag? */
2568                 if ((page_offset + rx_frag_size + rx_frag_size) >
2569                                         adapter->big_page_size) {
2570                         pagep = NULL;
2571                         page_info->last_frag = true;
2572                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2573                 } else {
2574                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2575                 }
2576
2577                 prev_page_info = page_info;
2578                 queue_head_inc(rxq);
2579                 page_info = &rxo->page_info_tbl[rxq->head];
2580         }
2581
2582         /* Mark the last frag of a page when we break out of the above loop
2583          * with no more slots available in the RXQ
2584          */
2585         if (pagep) {
2586                 prev_page_info->last_frag = true;
2587                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2588         }
2589
2590         if (posted) {
2591                 atomic_add(posted, &rxq->used);
2592                 if (rxo->rx_post_starved)
2593                         rxo->rx_post_starved = false;
2594                 do {
2595                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2596                         be_rxq_notify(adapter, rxq->id, notify);
2597                         posted -= notify;
2598                 } while (posted);
2599         } else if (atomic_read(&rxq->used) == 0) {
2600                 /* Let be_worker replenish when memory is available */
2601                 rxo->rx_post_starved = true;
2602         }
2603 }
2604
2605 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2606 {
2607         struct be_queue_info *tx_cq = &txo->cq;
2608         struct be_tx_compl_info *txcp = &txo->txcp;
2609         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2610
2611         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2612                 return NULL;
2613
2614         /* Ensure load ordering of valid bit dword and other dwords below */
2615         rmb();
2616         be_dws_le_to_cpu(compl, sizeof(*compl));
2617
2618         txcp->status = GET_TX_COMPL_BITS(status, compl);
2619         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2620
2621         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2622         queue_tail_inc(tx_cq);
2623         return txcp;
2624 }
2625
2626 static u16 be_tx_compl_process(struct be_adapter *adapter,
2627                                struct be_tx_obj *txo, u16 last_index)
2628 {
2629         struct sk_buff **sent_skbs = txo->sent_skb_list;
2630         struct be_queue_info *txq = &txo->q;
2631         struct sk_buff *skb = NULL;
2632         bool unmap_skb_hdr = false;
2633         struct be_eth_wrb *wrb;
2634         u16 num_wrbs = 0;
2635         u32 frag_index;
2636
2637         do {
2638                 if (sent_skbs[txq->tail]) {
2639                         /* Free skb from prev req */
2640                         if (skb)
2641                                 dev_consume_skb_any(skb);
2642                         skb = sent_skbs[txq->tail];
2643                         sent_skbs[txq->tail] = NULL;
2644                         queue_tail_inc(txq);  /* skip hdr wrb */
2645                         num_wrbs++;
2646                         unmap_skb_hdr = true;
2647                 }
2648                 wrb = queue_tail_node(txq);
2649                 frag_index = txq->tail;
2650                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2651                               (unmap_skb_hdr && skb_headlen(skb)));
2652                 unmap_skb_hdr = false;
2653                 queue_tail_inc(txq);
2654                 num_wrbs++;
2655         } while (frag_index != last_index);
2656         dev_consume_skb_any(skb);
2657
2658         return num_wrbs;
2659 }
2660
2661 /* Return the number of events in the event queue */
2662 static inline int events_get(struct be_eq_obj *eqo)
2663 {
2664         struct be_eq_entry *eqe;
2665         int num = 0;
2666
2667         do {
2668                 eqe = queue_tail_node(&eqo->q);
2669                 if (eqe->evt == 0)
2670                         break;
2671
2672                 rmb();
2673                 eqe->evt = 0;
2674                 num++;
2675                 queue_tail_inc(&eqo->q);
2676         } while (true);
2677
2678         return num;
2679 }
2680
2681 /* Leaves the EQ is disarmed state */
2682 static void be_eq_clean(struct be_eq_obj *eqo)
2683 {
2684         int num = events_get(eqo);
2685
2686         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2687 }
2688
2689 /* Free posted rx buffers that were not used */
2690 static void be_rxq_clean(struct be_rx_obj *rxo)
2691 {
2692         struct be_queue_info *rxq = &rxo->q;
2693         struct be_rx_page_info *page_info;
2694
2695         while (atomic_read(&rxq->used) > 0) {
2696                 page_info = get_rx_page_info(rxo);
2697                 put_page(page_info->page);
2698                 memset(page_info, 0, sizeof(*page_info));
2699         }
2700         BUG_ON(atomic_read(&rxq->used));
2701         rxq->tail = 0;
2702         rxq->head = 0;
2703 }
2704
2705 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2706 {
2707         struct be_queue_info *rx_cq = &rxo->cq;
2708         struct be_rx_compl_info *rxcp;
2709         struct be_adapter *adapter = rxo->adapter;
2710         int flush_wait = 0;
2711
2712         /* Consume pending rx completions.
2713          * Wait for the flush completion (identified by zero num_rcvd)
2714          * to arrive. Notify CQ even when there are no more CQ entries
2715          * for HW to flush partially coalesced CQ entries.
2716          * In Lancer, there is no need to wait for flush compl.
2717          */
2718         for (;;) {
2719                 rxcp = be_rx_compl_get(rxo);
2720                 if (!rxcp) {
2721                         if (lancer_chip(adapter))
2722                                 break;
2723
2724                         if (flush_wait++ > 50 ||
2725                             be_check_error(adapter,
2726                                            BE_ERROR_HW)) {
2727                                 dev_warn(&adapter->pdev->dev,
2728                                          "did not receive flush compl\n");
2729                                 break;
2730                         }
2731                         be_cq_notify(adapter, rx_cq->id, true, 0);
2732                         mdelay(1);
2733                 } else {
2734                         be_rx_compl_discard(rxo, rxcp);
2735                         be_cq_notify(adapter, rx_cq->id, false, 1);
2736                         if (rxcp->num_rcvd == 0)
2737                                 break;
2738                 }
2739         }
2740
2741         /* After cleanup, leave the CQ in unarmed state */
2742         be_cq_notify(adapter, rx_cq->id, false, 0);
2743 }
2744
2745 static void be_tx_compl_clean(struct be_adapter *adapter)
2746 {
2747         struct device *dev = &adapter->pdev->dev;
2748         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2749         struct be_tx_compl_info *txcp;
2750         struct be_queue_info *txq;
2751         u32 end_idx, notified_idx;
2752         struct be_tx_obj *txo;
2753         int i, pending_txqs;
2754
2755         /* Stop polling for compls when HW has been silent for 10ms */
2756         do {
2757                 pending_txqs = adapter->num_tx_qs;
2758
2759                 for_all_tx_queues(adapter, txo, i) {
2760                         cmpl = 0;
2761                         num_wrbs = 0;
2762                         txq = &txo->q;
2763                         while ((txcp = be_tx_compl_get(txo))) {
2764                                 num_wrbs +=
2765                                         be_tx_compl_process(adapter, txo,
2766                                                             txcp->end_index);
2767                                 cmpl++;
2768                         }
2769                         if (cmpl) {
2770                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2771                                 atomic_sub(num_wrbs, &txq->used);
2772                                 timeo = 0;
2773                         }
2774                         if (!be_is_tx_compl_pending(txo))
2775                                 pending_txqs--;
2776                 }
2777
2778                 if (pending_txqs == 0 || ++timeo > 10 ||
2779                     be_check_error(adapter, BE_ERROR_HW))
2780                         break;
2781
2782                 mdelay(1);
2783         } while (true);
2784
2785         /* Free enqueued TX that was never notified to HW */
2786         for_all_tx_queues(adapter, txo, i) {
2787                 txq = &txo->q;
2788
2789                 if (atomic_read(&txq->used)) {
2790                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2791                                  i, atomic_read(&txq->used));
2792                         notified_idx = txq->tail;
2793                         end_idx = txq->tail;
2794                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2795                                   txq->len);
2796                         /* Use the tx-compl process logic to handle requests
2797                          * that were not sent to the HW.
2798                          */
2799                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2800                         atomic_sub(num_wrbs, &txq->used);
2801                         BUG_ON(atomic_read(&txq->used));
2802                         txo->pend_wrb_cnt = 0;
2803                         /* Since hw was never notified of these requests,
2804                          * reset TXQ indices
2805                          */
2806                         txq->head = notified_idx;
2807                         txq->tail = notified_idx;
2808                 }
2809         }
2810 }
2811
2812 static void be_evt_queues_destroy(struct be_adapter *adapter)
2813 {
2814         struct be_eq_obj *eqo;
2815         int i;
2816
2817         for_all_evt_queues(adapter, eqo, i) {
2818                 if (eqo->q.created) {
2819                         be_eq_clean(eqo);
2820                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2821                         netif_napi_del(&eqo->napi);
2822                         free_cpumask_var(eqo->affinity_mask);
2823                 }
2824                 be_queue_free(adapter, &eqo->q);
2825         }
2826 }
2827
2828 static int be_evt_queues_create(struct be_adapter *adapter)
2829 {
2830         struct be_queue_info *eq;
2831         struct be_eq_obj *eqo;
2832         struct be_aic_obj *aic;
2833         int i, rc;
2834
2835         /* need enough EQs to service both RX and TX queues */
2836         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2837                                     max(adapter->cfg_num_rx_irqs,
2838                                         adapter->cfg_num_tx_irqs));
2839
2840         for_all_evt_queues(adapter, eqo, i) {
2841                 int numa_node = dev_to_node(&adapter->pdev->dev);
2842
2843                 aic = &adapter->aic_obj[i];
2844                 eqo->adapter = adapter;
2845                 eqo->idx = i;
2846                 aic->max_eqd = BE_MAX_EQD;
2847                 aic->enable = true;
2848
2849                 eq = &eqo->q;
2850                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2851                                     sizeof(struct be_eq_entry));
2852                 if (rc)
2853                         return rc;
2854
2855                 rc = be_cmd_eq_create(adapter, eqo);
2856                 if (rc)
2857                         return rc;
2858
2859                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2860                         return -ENOMEM;
2861                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2862                                 eqo->affinity_mask);
2863                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2864                                BE_NAPI_WEIGHT);
2865         }
2866         return 0;
2867 }
2868
2869 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2870 {
2871         struct be_queue_info *q;
2872
2873         q = &adapter->mcc_obj.q;
2874         if (q->created)
2875                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2876         be_queue_free(adapter, q);
2877
2878         q = &adapter->mcc_obj.cq;
2879         if (q->created)
2880                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2881         be_queue_free(adapter, q);
2882 }
2883
2884 /* Must be called only after TX qs are created as MCC shares TX EQ */
2885 static int be_mcc_queues_create(struct be_adapter *adapter)
2886 {
2887         struct be_queue_info *q, *cq;
2888
2889         cq = &adapter->mcc_obj.cq;
2890         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2891                            sizeof(struct be_mcc_compl)))
2892                 goto err;
2893
2894         /* Use the default EQ for MCC completions */
2895         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2896                 goto mcc_cq_free;
2897
2898         q = &adapter->mcc_obj.q;
2899         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2900                 goto mcc_cq_destroy;
2901
2902         if (be_cmd_mccq_create(adapter, q, cq))
2903                 goto mcc_q_free;
2904
2905         return 0;
2906
2907 mcc_q_free:
2908         be_queue_free(adapter, q);
2909 mcc_cq_destroy:
2910         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2911 mcc_cq_free:
2912         be_queue_free(adapter, cq);
2913 err:
2914         return -1;
2915 }
2916
2917 static void be_tx_queues_destroy(struct be_adapter *adapter)
2918 {
2919         struct be_queue_info *q;
2920         struct be_tx_obj *txo;
2921         u8 i;
2922
2923         for_all_tx_queues(adapter, txo, i) {
2924                 q = &txo->q;
2925                 if (q->created)
2926                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2927                 be_queue_free(adapter, q);
2928
2929                 q = &txo->cq;
2930                 if (q->created)
2931                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2932                 be_queue_free(adapter, q);
2933         }
2934 }
2935
2936 static int be_tx_qs_create(struct be_adapter *adapter)
2937 {
2938         struct be_queue_info *cq;
2939         struct be_tx_obj *txo;
2940         struct be_eq_obj *eqo;
2941         int status, i;
2942
2943         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2944
2945         for_all_tx_queues(adapter, txo, i) {
2946                 cq = &txo->cq;
2947                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2948                                         sizeof(struct be_eth_tx_compl));
2949                 if (status)
2950                         return status;
2951
2952                 u64_stats_init(&txo->stats.sync);
2953                 u64_stats_init(&txo->stats.sync_compl);
2954
2955                 /* If num_evt_qs is less than num_tx_qs, then more than
2956                  * one txq share an eq
2957                  */
2958                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2959                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2960                 if (status)
2961                         return status;
2962
2963                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2964                                         sizeof(struct be_eth_wrb));
2965                 if (status)
2966                         return status;
2967
2968                 status = be_cmd_txq_create(adapter, txo);
2969                 if (status)
2970                         return status;
2971
2972                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2973                                     eqo->idx);
2974         }
2975
2976         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2977                  adapter->num_tx_qs);
2978         return 0;
2979 }
2980
2981 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2982 {
2983         struct be_queue_info *q;
2984         struct be_rx_obj *rxo;
2985         int i;
2986
2987         for_all_rx_queues(adapter, rxo, i) {
2988                 q = &rxo->cq;
2989                 if (q->created)
2990                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2991                 be_queue_free(adapter, q);
2992         }
2993 }
2994
2995 static int be_rx_cqs_create(struct be_adapter *adapter)
2996 {
2997         struct be_queue_info *eq, *cq;
2998         struct be_rx_obj *rxo;
2999         int rc, i;
3000
3001         adapter->num_rss_qs =
3002                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3003
3004         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3005         if (adapter->num_rss_qs < 2)
3006                 adapter->num_rss_qs = 0;
3007
3008         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3009
3010         /* When the interface is not capable of RSS rings (and there is no
3011          * need to create a default RXQ) we'll still need one RXQ
3012          */
3013         if (adapter->num_rx_qs == 0)
3014                 adapter->num_rx_qs = 1;
3015
3016         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3017         for_all_rx_queues(adapter, rxo, i) {
3018                 rxo->adapter = adapter;
3019                 cq = &rxo->cq;
3020                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3021                                     sizeof(struct be_eth_rx_compl));
3022                 if (rc)
3023                         return rc;
3024
3025                 u64_stats_init(&rxo->stats.sync);
3026                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3027                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3028                 if (rc)
3029                         return rc;
3030         }
3031
3032         dev_info(&adapter->pdev->dev,
3033                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3034         return 0;
3035 }
3036
3037 static irqreturn_t be_intx(int irq, void *dev)
3038 {
3039         struct be_eq_obj *eqo = dev;
3040         struct be_adapter *adapter = eqo->adapter;
3041         int num_evts = 0;
3042
3043         /* IRQ is not expected when NAPI is scheduled as the EQ
3044          * will not be armed.
3045          * But, this can happen on Lancer INTx where it takes
3046          * a while to de-assert INTx or in BE2 where occasionaly
3047          * an interrupt may be raised even when EQ is unarmed.
3048          * If NAPI is already scheduled, then counting & notifying
3049          * events will orphan them.
3050          */
3051         if (napi_schedule_prep(&eqo->napi)) {
3052                 num_evts = events_get(eqo);
3053                 __napi_schedule(&eqo->napi);
3054                 if (num_evts)
3055                         eqo->spurious_intr = 0;
3056         }
3057         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3058
3059         /* Return IRQ_HANDLED only for the the first spurious intr
3060          * after a valid intr to stop the kernel from branding
3061          * this irq as a bad one!
3062          */
3063         if (num_evts || eqo->spurious_intr++ == 0)
3064                 return IRQ_HANDLED;
3065         else
3066                 return IRQ_NONE;
3067 }
3068
3069 static irqreturn_t be_msix(int irq, void *dev)
3070 {
3071         struct be_eq_obj *eqo = dev;
3072
3073         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3074         napi_schedule(&eqo->napi);
3075         return IRQ_HANDLED;
3076 }
3077
3078 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3079 {
3080         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3081 }
3082
3083 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3084                          int budget, int polling)
3085 {
3086         struct be_adapter *adapter = rxo->adapter;
3087         struct be_queue_info *rx_cq = &rxo->cq;
3088         struct be_rx_compl_info *rxcp;
3089         u32 work_done;
3090         u32 frags_consumed = 0;
3091
3092         for (work_done = 0; work_done < budget; work_done++) {
3093                 rxcp = be_rx_compl_get(rxo);
3094                 if (!rxcp)
3095                         break;
3096
3097                 /* Is it a flush compl that has no data */
3098                 if (unlikely(rxcp->num_rcvd == 0))
3099                         goto loop_continue;
3100
3101                 /* Discard compl with partial DMA Lancer B0 */
3102                 if (unlikely(!rxcp->pkt_size)) {
3103                         be_rx_compl_discard(rxo, rxcp);
3104                         goto loop_continue;
3105                 }
3106
3107                 /* On BE drop pkts that arrive due to imperfect filtering in
3108                  * promiscuous mode on some skews
3109                  */
3110                 if (unlikely(rxcp->port != adapter->port_num &&
3111                              !lancer_chip(adapter))) {
3112                         be_rx_compl_discard(rxo, rxcp);
3113                         goto loop_continue;
3114                 }
3115
3116                 /* Don't do gro when we're busy_polling */
3117                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3118                         be_rx_compl_process_gro(rxo, napi, rxcp);
3119                 else
3120                         be_rx_compl_process(rxo, napi, rxcp);
3121
3122 loop_continue:
3123                 frags_consumed += rxcp->num_rcvd;
3124                 be_rx_stats_update(rxo, rxcp);
3125         }
3126
3127         if (work_done) {
3128                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3129
3130                 /* When an rx-obj gets into post_starved state, just
3131                  * let be_worker do the posting.
3132                  */
3133                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3134                     !rxo->rx_post_starved)
3135                         be_post_rx_frags(rxo, GFP_ATOMIC,
3136                                          max_t(u32, MAX_RX_POST,
3137                                                frags_consumed));
3138         }
3139
3140         return work_done;
3141 }
3142
3143 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3144 {
3145         switch (status) {
3146         case BE_TX_COMP_HDR_PARSE_ERR:
3147                 tx_stats(txo)->tx_hdr_parse_err++;
3148                 break;
3149         case BE_TX_COMP_NDMA_ERR:
3150                 tx_stats(txo)->tx_dma_err++;
3151                 break;
3152         case BE_TX_COMP_ACL_ERR:
3153                 tx_stats(txo)->tx_spoof_check_err++;
3154                 break;
3155         }
3156 }
3157
3158 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3159 {
3160         switch (status) {
3161         case LANCER_TX_COMP_LSO_ERR:
3162                 tx_stats(txo)->tx_tso_err++;
3163                 break;
3164         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3165         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3166                 tx_stats(txo)->tx_spoof_check_err++;
3167                 break;
3168         case LANCER_TX_COMP_QINQ_ERR:
3169                 tx_stats(txo)->tx_qinq_err++;
3170                 break;
3171         case LANCER_TX_COMP_PARITY_ERR:
3172                 tx_stats(txo)->tx_internal_parity_err++;
3173                 break;
3174         case LANCER_TX_COMP_DMA_ERR:
3175                 tx_stats(txo)->tx_dma_err++;
3176                 break;
3177         }
3178 }
3179
3180 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3181                           int idx)
3182 {
3183         int num_wrbs = 0, work_done = 0;
3184         struct be_tx_compl_info *txcp;
3185
3186         while ((txcp = be_tx_compl_get(txo))) {
3187                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3188                 work_done++;
3189
3190                 if (txcp->status) {
3191                         if (lancer_chip(adapter))
3192                                 lancer_update_tx_err(txo, txcp->status);
3193                         else
3194                                 be_update_tx_err(txo, txcp->status);
3195                 }
3196         }
3197
3198         if (work_done) {
3199                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3200                 atomic_sub(num_wrbs, &txo->q.used);
3201
3202                 /* As Tx wrbs have been freed up, wake up netdev queue
3203                  * if it was stopped due to lack of tx wrbs.  */
3204                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3205                     be_can_txq_wake(txo)) {
3206                         netif_wake_subqueue(adapter->netdev, idx);
3207                 }
3208
3209                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3210                 tx_stats(txo)->tx_compl += work_done;
3211                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3212         }
3213 }
3214
3215 #ifdef CONFIG_NET_RX_BUSY_POLL
3216 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3217 {
3218         bool status = true;
3219
3220         spin_lock(&eqo->lock); /* BH is already disabled */
3221         if (eqo->state & BE_EQ_LOCKED) {
3222                 WARN_ON(eqo->state & BE_EQ_NAPI);
3223                 eqo->state |= BE_EQ_NAPI_YIELD;
3224                 status = false;
3225         } else {
3226                 eqo->state = BE_EQ_NAPI;
3227         }
3228         spin_unlock(&eqo->lock);
3229         return status;
3230 }
3231
3232 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3233 {
3234         spin_lock(&eqo->lock); /* BH is already disabled */
3235
3236         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3237         eqo->state = BE_EQ_IDLE;
3238
3239         spin_unlock(&eqo->lock);
3240 }
3241
3242 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3243 {
3244         bool status = true;
3245
3246         spin_lock_bh(&eqo->lock);
3247         if (eqo->state & BE_EQ_LOCKED) {
3248                 eqo->state |= BE_EQ_POLL_YIELD;
3249                 status = false;
3250         } else {
3251                 eqo->state |= BE_EQ_POLL;
3252         }
3253         spin_unlock_bh(&eqo->lock);
3254         return status;
3255 }
3256
3257 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3258 {
3259         spin_lock_bh(&eqo->lock);
3260
3261         WARN_ON(eqo->state & (BE_EQ_NAPI));
3262         eqo->state = BE_EQ_IDLE;
3263
3264         spin_unlock_bh(&eqo->lock);
3265 }
3266
3267 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3268 {
3269         spin_lock_init(&eqo->lock);
3270         eqo->state = BE_EQ_IDLE;
3271 }
3272
3273 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3274 {
3275         local_bh_disable();
3276
3277         /* It's enough to just acquire napi lock on the eqo to stop
3278          * be_busy_poll() from processing any queueus.
3279          */
3280         while (!be_lock_napi(eqo))
3281                 mdelay(1);
3282
3283         local_bh_enable();
3284 }
3285
3286 #else /* CONFIG_NET_RX_BUSY_POLL */
3287
3288 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3289 {
3290         return true;
3291 }
3292
3293 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3294 {
3295 }
3296
3297 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3298 {
3299         return false;
3300 }
3301
3302 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3303 {
3304 }
3305
3306 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3307 {
3308 }
3309
3310 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3311 {
3312 }
3313 #endif /* CONFIG_NET_RX_BUSY_POLL */
3314
3315 int be_poll(struct napi_struct *napi, int budget)
3316 {
3317         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3318         struct be_adapter *adapter = eqo->adapter;
3319         int max_work = 0, work, i, num_evts;
3320         struct be_rx_obj *rxo;
3321         struct be_tx_obj *txo;
3322         u32 mult_enc = 0;
3323
3324         num_evts = events_get(eqo);
3325
3326         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3327                 be_process_tx(adapter, txo, i);
3328
3329         if (be_lock_napi(eqo)) {
3330                 /* This loop will iterate twice for EQ0 in which
3331                  * completions of the last RXQ (default one) are also processed
3332                  * For other EQs the loop iterates only once
3333                  */
3334                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3335                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3336                         max_work = max(work, max_work);
3337                 }
3338                 be_unlock_napi(eqo);
3339         } else {
3340                 max_work = budget;
3341         }
3342
3343         if (is_mcc_eqo(eqo))
3344                 be_process_mcc(adapter);
3345
3346         if (max_work < budget) {
3347                 napi_complete(napi);
3348
3349                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3350                  * delay via a delay multiplier encoding value
3351                  */
3352                 if (skyhawk_chip(adapter))
3353                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3354
3355                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3356                              mult_enc);
3357         } else {
3358                 /* As we'll continue in polling mode, count and clear events */
3359                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3360         }
3361         return max_work;
3362 }
3363
3364 #ifdef CONFIG_NET_RX_BUSY_POLL
3365 static int be_busy_poll(struct napi_struct *napi)
3366 {
3367         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3368         struct be_adapter *adapter = eqo->adapter;
3369         struct be_rx_obj *rxo;
3370         int i, work = 0;
3371
3372         if (!be_lock_busy_poll(eqo))
3373                 return LL_FLUSH_BUSY;
3374
3375         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3376                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3377                 if (work)
3378                         break;
3379         }
3380
3381         be_unlock_busy_poll(eqo);
3382         return work;
3383 }
3384 #endif
3385
3386 void be_detect_error(struct be_adapter *adapter)
3387 {
3388         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3389         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3390         u32 i;
3391         struct device *dev = &adapter->pdev->dev;
3392
3393         if (be_check_error(adapter, BE_ERROR_HW))
3394                 return;
3395
3396         if (lancer_chip(adapter)) {
3397                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3398                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3399                         be_set_error(adapter, BE_ERROR_UE);
3400                         sliport_err1 = ioread32(adapter->db +
3401                                                 SLIPORT_ERROR1_OFFSET);
3402                         sliport_err2 = ioread32(adapter->db +
3403                                                 SLIPORT_ERROR2_OFFSET);
3404                         /* Do not log error messages if its a FW reset */
3405                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3406                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3407                                 dev_info(dev, "Firmware update in progress\n");
3408                         } else {
3409                                 dev_err(dev, "Error detected in the card\n");
3410                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3411                                         sliport_status);
3412                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3413                                         sliport_err1);
3414                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3415                                         sliport_err2);
3416                         }
3417                 }
3418         } else {
3419                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3420                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3421                 ue_lo_mask = ioread32(adapter->pcicfg +
3422                                       PCICFG_UE_STATUS_LOW_MASK);
3423                 ue_hi_mask = ioread32(adapter->pcicfg +
3424                                       PCICFG_UE_STATUS_HI_MASK);
3425
3426                 ue_lo = (ue_lo & ~ue_lo_mask);
3427                 ue_hi = (ue_hi & ~ue_hi_mask);
3428
3429                 /* On certain platforms BE hardware can indicate spurious UEs.
3430                  * Allow HW to stop working completely in case of a real UE.
3431                  * Hence not setting the hw_error for UE detection.
3432                  */
3433
3434                 if (ue_lo || ue_hi) {
3435                         dev_err(dev, "Error detected in the adapter");
3436                         if (skyhawk_chip(adapter))
3437                                 be_set_error(adapter, BE_ERROR_UE);
3438
3439                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3440                                 if (ue_lo & 1)
3441                                         dev_err(dev, "UE: %s bit set\n",
3442                                                 ue_status_low_desc[i]);
3443                         }
3444                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3445                                 if (ue_hi & 1)
3446                                         dev_err(dev, "UE: %s bit set\n",
3447                                                 ue_status_hi_desc[i]);
3448                         }
3449                 }
3450         }
3451 }
3452
3453 static void be_msix_disable(struct be_adapter *adapter)
3454 {
3455         if (msix_enabled(adapter)) {
3456                 pci_disable_msix(adapter->pdev);
3457                 adapter->num_msix_vec = 0;
3458                 adapter->num_msix_roce_vec = 0;
3459         }
3460 }
3461
3462 static int be_msix_enable(struct be_adapter *adapter)
3463 {
3464         unsigned int i, max_roce_eqs;
3465         struct device *dev = &adapter->pdev->dev;
3466         int num_vec;
3467
3468         /* If RoCE is supported, program the max number of vectors that
3469          * could be used for NIC and RoCE, else, just program the number
3470          * we'll use initially.
3471          */
3472         if (be_roce_supported(adapter)) {
3473                 max_roce_eqs =
3474                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3475                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3476                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3477         } else {
3478                 num_vec = max(adapter->cfg_num_rx_irqs,
3479                               adapter->cfg_num_tx_irqs);
3480         }
3481
3482         for (i = 0; i < num_vec; i++)
3483                 adapter->msix_entries[i].entry = i;
3484
3485         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3486                                         MIN_MSIX_VECTORS, num_vec);
3487         if (num_vec < 0)
3488                 goto fail;
3489
3490         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3491                 adapter->num_msix_roce_vec = num_vec / 2;
3492                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3493                          adapter->num_msix_roce_vec);
3494         }
3495
3496         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3497
3498         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3499                  adapter->num_msix_vec);
3500         return 0;
3501
3502 fail:
3503         dev_warn(dev, "MSIx enable failed\n");
3504
3505         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3506         if (be_virtfn(adapter))
3507                 return num_vec;
3508         return 0;
3509 }
3510
3511 static inline int be_msix_vec_get(struct be_adapter *adapter,
3512                                   struct be_eq_obj *eqo)
3513 {
3514         return adapter->msix_entries[eqo->msix_idx].vector;
3515 }
3516
3517 static int be_msix_register(struct be_adapter *adapter)
3518 {
3519         struct net_device *netdev = adapter->netdev;
3520         struct be_eq_obj *eqo;
3521         int status, i, vec;
3522
3523         for_all_evt_queues(adapter, eqo, i) {
3524                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3525                 vec = be_msix_vec_get(adapter, eqo);
3526                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3527                 if (status)
3528                         goto err_msix;
3529
3530                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3531         }
3532
3533         return 0;
3534 err_msix:
3535         for (i--; i >= 0; i--) {
3536                 eqo = &adapter->eq_obj[i];
3537                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3538         }
3539         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3540                  status);
3541         be_msix_disable(adapter);
3542         return status;
3543 }
3544
3545 static int be_irq_register(struct be_adapter *adapter)
3546 {
3547         struct net_device *netdev = adapter->netdev;
3548         int status;
3549
3550         if (msix_enabled(adapter)) {
3551                 status = be_msix_register(adapter);
3552                 if (status == 0)
3553                         goto done;
3554                 /* INTx is not supported for VF */
3555                 if (be_virtfn(adapter))
3556                         return status;
3557         }
3558
3559         /* INTx: only the first EQ is used */
3560         netdev->irq = adapter->pdev->irq;
3561         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3562                              &adapter->eq_obj[0]);
3563         if (status) {
3564                 dev_err(&adapter->pdev->dev,
3565                         "INTx request IRQ failed - err %d\n", status);
3566                 return status;
3567         }
3568 done:
3569         adapter->isr_registered = true;
3570         return 0;
3571 }
3572
3573 static void be_irq_unregister(struct be_adapter *adapter)
3574 {
3575         struct net_device *netdev = adapter->netdev;
3576         struct be_eq_obj *eqo;
3577         int i, vec;
3578
3579         if (!adapter->isr_registered)
3580                 return;
3581
3582         /* INTx */
3583         if (!msix_enabled(adapter)) {
3584                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3585                 goto done;
3586         }
3587
3588         /* MSIx */
3589         for_all_evt_queues(adapter, eqo, i) {
3590                 vec = be_msix_vec_get(adapter, eqo);
3591                 irq_set_affinity_hint(vec, NULL);
3592                 free_irq(vec, eqo);
3593         }
3594
3595 done:
3596         adapter->isr_registered = false;
3597 }
3598
3599 static void be_rx_qs_destroy(struct be_adapter *adapter)
3600 {
3601         struct rss_info *rss = &adapter->rss_info;
3602         struct be_queue_info *q;
3603         struct be_rx_obj *rxo;
3604         int i;
3605
3606         for_all_rx_queues(adapter, rxo, i) {
3607                 q = &rxo->q;
3608                 if (q->created) {
3609                         /* If RXQs are destroyed while in an "out of buffer"
3610                          * state, there is a possibility of an HW stall on
3611                          * Lancer. So, post 64 buffers to each queue to relieve
3612                          * the "out of buffer" condition.
3613                          * Make sure there's space in the RXQ before posting.
3614                          */
3615                         if (lancer_chip(adapter)) {
3616                                 be_rx_cq_clean(rxo);
3617                                 if (atomic_read(&q->used) == 0)
3618                                         be_post_rx_frags(rxo, GFP_KERNEL,
3619                                                          MAX_RX_POST);
3620                         }
3621
3622                         be_cmd_rxq_destroy(adapter, q);
3623                         be_rx_cq_clean(rxo);
3624                         be_rxq_clean(rxo);
3625                 }
3626                 be_queue_free(adapter, q);
3627         }
3628
3629         if (rss->rss_flags) {
3630                 rss->rss_flags = RSS_ENABLE_NONE;
3631                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3632                                   128, rss->rss_hkey);
3633         }
3634 }
3635
3636 static void be_disable_if_filters(struct be_adapter *adapter)
3637 {
3638         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3639         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3640             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3641                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3642                 eth_zero_addr(adapter->dev_mac);
3643         }
3644
3645         be_clear_uc_list(adapter);
3646         be_clear_mc_list(adapter);
3647
3648         /* The IFACE flags are enabled in the open path and cleared
3649          * in the close path. When a VF gets detached from the host and
3650          * assigned to a VM the following happens:
3651          *      - VF's IFACE flags get cleared in the detach path
3652          *      - IFACE create is issued by the VF in the attach path
3653          * Due to a bug in the BE3/Skyhawk-R FW
3654          * (Lancer FW doesn't have the bug), the IFACE capability flags
3655          * specified along with the IFACE create cmd issued by a VF are not
3656          * honoured by FW.  As a consequence, if a *new* driver
3657          * (that enables/disables IFACE flags in open/close)
3658          * is loaded in the host and an *old* driver is * used by a VM/VF,
3659          * the IFACE gets created *without* the needed flags.
3660          * To avoid this, disable RX-filter flags only for Lancer.
3661          */
3662         if (lancer_chip(adapter)) {
3663                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3664                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3665         }
3666 }
3667
3668 static int be_close(struct net_device *netdev)
3669 {
3670         struct be_adapter *adapter = netdev_priv(netdev);
3671         struct be_eq_obj *eqo;
3672         int i;
3673
3674         /* This protection is needed as be_close() may be called even when the
3675          * adapter is in cleared state (after eeh perm failure)
3676          */
3677         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3678                 return 0;
3679
3680         /* Before attempting cleanup ensure all the pending cmds in the
3681          * config_wq have finished execution
3682          */
3683         flush_workqueue(be_wq);
3684
3685         be_disable_if_filters(adapter);
3686
3687         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3688                 for_all_evt_queues(adapter, eqo, i) {
3689                         napi_disable(&eqo->napi);
3690                         be_disable_busy_poll(eqo);
3691                 }
3692                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3693         }
3694
3695         be_async_mcc_disable(adapter);
3696
3697         /* Wait for all pending tx completions to arrive so that
3698          * all tx skbs are freed.
3699          */
3700         netif_tx_disable(netdev);
3701         be_tx_compl_clean(adapter);
3702
3703         be_rx_qs_destroy(adapter);
3704
3705         for_all_evt_queues(adapter, eqo, i) {
3706                 if (msix_enabled(adapter))
3707                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3708                 else
3709                         synchronize_irq(netdev->irq);
3710                 be_eq_clean(eqo);
3711         }
3712
3713         be_irq_unregister(adapter);
3714
3715         return 0;
3716 }
3717
3718 static int be_rx_qs_create(struct be_adapter *adapter)
3719 {
3720         struct rss_info *rss = &adapter->rss_info;
3721         u8 rss_key[RSS_HASH_KEY_LEN];
3722         struct be_rx_obj *rxo;
3723         int rc, i, j;
3724
3725         for_all_rx_queues(adapter, rxo, i) {
3726                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3727                                     sizeof(struct be_eth_rx_d));
3728                 if (rc)
3729                         return rc;
3730         }
3731
3732         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3733                 rxo = default_rxo(adapter);
3734                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3735                                        rx_frag_size, adapter->if_handle,
3736                                        false, &rxo->rss_id);
3737                 if (rc)
3738                         return rc;
3739         }
3740
3741         for_all_rss_queues(adapter, rxo, i) {
3742                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3743                                        rx_frag_size, adapter->if_handle,
3744                                        true, &rxo->rss_id);
3745                 if (rc)
3746                         return rc;
3747         }
3748
3749         if (be_multi_rxq(adapter)) {
3750                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3751                         for_all_rss_queues(adapter, rxo, i) {
3752                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3753                                         break;
3754                                 rss->rsstable[j + i] = rxo->rss_id;
3755                                 rss->rss_queue[j + i] = i;
3756                         }
3757                 }
3758                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3759                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3760
3761                 if (!BEx_chip(adapter))
3762                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3763                                 RSS_ENABLE_UDP_IPV6;
3764
3765                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3766                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3767                                        RSS_INDIR_TABLE_LEN, rss_key);
3768                 if (rc) {
3769                         rss->rss_flags = RSS_ENABLE_NONE;
3770                         return rc;
3771                 }
3772
3773                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3774         } else {
3775                 /* Disable RSS, if only default RX Q is created */
3776                 rss->rss_flags = RSS_ENABLE_NONE;
3777         }
3778
3779
3780         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3781          * which is a queue empty condition
3782          */
3783         for_all_rx_queues(adapter, rxo, i)
3784                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3785
3786         return 0;
3787 }
3788
3789 static int be_enable_if_filters(struct be_adapter *adapter)
3790 {
3791         int status;
3792
3793         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3794         if (status)
3795                 return status;
3796
3797         /* Normally this condition usually true as the ->dev_mac is zeroed.
3798          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3799          * subsequent be_dev_mac_add() can fail (after fresh boot)
3800          */
3801         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3802                 int old_pmac_id = -1;
3803
3804                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3805                 if (!is_zero_ether_addr(adapter->dev_mac))
3806                         old_pmac_id = adapter->pmac_id[0];
3807
3808                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3809                 if (status)
3810                         return status;
3811
3812                 /* Delete the old programmed MAC as we successfully programmed
3813                  * a new MAC
3814                  */
3815                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3816                         be_dev_mac_del(adapter, old_pmac_id);
3817
3818                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3819         }
3820
3821         if (adapter->vlans_added)
3822                 be_vid_config(adapter);
3823
3824         __be_set_rx_mode(adapter);
3825
3826         return 0;
3827 }
3828
3829 static int be_open(struct net_device *netdev)
3830 {
3831         struct be_adapter *adapter = netdev_priv(netdev);
3832         struct be_eq_obj *eqo;
3833         struct be_rx_obj *rxo;
3834         struct be_tx_obj *txo;
3835         u8 link_status;
3836         int status, i;
3837
3838         status = be_rx_qs_create(adapter);
3839         if (status)
3840                 goto err;
3841
3842         status = be_enable_if_filters(adapter);
3843         if (status)
3844                 goto err;
3845
3846         status = be_irq_register(adapter);
3847         if (status)
3848                 goto err;
3849
3850         for_all_rx_queues(adapter, rxo, i)
3851                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3852
3853         for_all_tx_queues(adapter, txo, i)
3854                 be_cq_notify(adapter, txo->cq.id, true, 0);
3855
3856         be_async_mcc_enable(adapter);
3857
3858         for_all_evt_queues(adapter, eqo, i) {
3859                 napi_enable(&eqo->napi);
3860                 be_enable_busy_poll(eqo);
3861                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3862         }
3863         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3864
3865         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3866         if (!status)
3867                 be_link_status_update(adapter, link_status);
3868
3869         netif_tx_start_all_queues(netdev);
3870         if (skyhawk_chip(adapter))
3871                 udp_tunnel_get_rx_info(netdev);
3872
3873         return 0;
3874 err:
3875         be_close(adapter->netdev);
3876         return -EIO;
3877 }
3878
3879 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3880 {
3881         u32 addr;
3882
3883         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3884
3885         mac[5] = (u8)(addr & 0xFF);
3886         mac[4] = (u8)((addr >> 8) & 0xFF);
3887         mac[3] = (u8)((addr >> 16) & 0xFF);
3888         /* Use the OUI from the current MAC address */
3889         memcpy(mac, adapter->netdev->dev_addr, 3);
3890 }
3891
3892 /*
3893  * Generate a seed MAC address from the PF MAC Address using jhash.
3894  * MAC Address for VFs are assigned incrementally starting from the seed.
3895  * These addresses are programmed in the ASIC by the PF and the VF driver
3896  * queries for the MAC address during its probe.
3897  */
3898 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3899 {
3900         u32 vf;
3901         int status = 0;
3902         u8 mac[ETH_ALEN];
3903         struct be_vf_cfg *vf_cfg;
3904
3905         be_vf_eth_addr_generate(adapter, mac);
3906
3907         for_all_vfs(adapter, vf_cfg, vf) {
3908                 if (BEx_chip(adapter))
3909                         status = be_cmd_pmac_add(adapter, mac,
3910                                                  vf_cfg->if_handle,
3911                                                  &vf_cfg->pmac_id, vf + 1);
3912                 else
3913                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3914                                                 vf + 1);
3915
3916                 if (status)
3917                         dev_err(&adapter->pdev->dev,
3918                                 "Mac address assignment failed for VF %d\n",
3919                                 vf);
3920                 else
3921                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3922
3923                 mac[5] += 1;
3924         }
3925         return status;
3926 }
3927
3928 static int be_vfs_mac_query(struct be_adapter *adapter)
3929 {
3930         int status, vf;
3931         u8 mac[ETH_ALEN];
3932         struct be_vf_cfg *vf_cfg;
3933
3934         for_all_vfs(adapter, vf_cfg, vf) {
3935                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3936                                                mac, vf_cfg->if_handle,
3937                                                false, vf+1);
3938                 if (status)
3939                         return status;
3940                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3941         }
3942         return 0;
3943 }
3944
3945 static void be_vf_clear(struct be_adapter *adapter)
3946 {
3947         struct be_vf_cfg *vf_cfg;
3948         u32 vf;
3949
3950         if (pci_vfs_assigned(adapter->pdev)) {
3951                 dev_warn(&adapter->pdev->dev,
3952                          "VFs are assigned to VMs: not disabling VFs\n");
3953                 goto done;
3954         }
3955
3956         pci_disable_sriov(adapter->pdev);
3957
3958         for_all_vfs(adapter, vf_cfg, vf) {
3959                 if (BEx_chip(adapter))
3960                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3961                                         vf_cfg->pmac_id, vf + 1);
3962                 else
3963                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3964                                        vf + 1);
3965
3966                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3967         }
3968
3969         if (BE3_chip(adapter))
3970                 be_cmd_set_hsw_config(adapter, 0, 0,
3971                                       adapter->if_handle,
3972                                       PORT_FWD_TYPE_PASSTHRU, 0);
3973 done:
3974         kfree(adapter->vf_cfg);
3975         adapter->num_vfs = 0;
3976         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3977 }
3978
3979 static void be_clear_queues(struct be_adapter *adapter)
3980 {
3981         be_mcc_queues_destroy(adapter);
3982         be_rx_cqs_destroy(adapter);
3983         be_tx_queues_destroy(adapter);
3984         be_evt_queues_destroy(adapter);
3985 }
3986
3987 static void be_cancel_worker(struct be_adapter *adapter)
3988 {
3989         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3990                 cancel_delayed_work_sync(&adapter->work);
3991                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3992         }
3993 }
3994
3995 static void be_cancel_err_detection(struct be_adapter *adapter)
3996 {
3997         struct be_error_recovery *err_rec = &adapter->error_recovery;
3998
3999         if (!be_err_recovery_workq)
4000                 return;
4001
4002         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
4003                 cancel_delayed_work_sync(&err_rec->err_detection_work);
4004                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
4005         }
4006 }
4007
4008 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4009 {
4010         struct net_device *netdev = adapter->netdev;
4011
4012         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4013                 be_cmd_manage_iface(adapter, adapter->if_handle,
4014                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4015
4016         if (adapter->vxlan_port)
4017                 be_cmd_set_vxlan_port(adapter, 0);
4018
4019         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4020         adapter->vxlan_port = 0;
4021
4022         netdev->hw_enc_features = 0;
4023         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4024         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
4025 }
4026
4027 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4028                                 struct be_resources *vft_res)
4029 {
4030         struct be_resources res = adapter->pool_res;
4031         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4032         struct be_resources res_mod = {0};
4033         u16 num_vf_qs = 1;
4034
4035         /* Distribute the queue resources among the PF and it's VFs */
4036         if (num_vfs) {
4037                 /* Divide the rx queues evenly among the VFs and the PF, capped
4038                  * at VF-EQ-count. Any remainder queues belong to the PF.
4039                  */
4040                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4041                                 res.max_rss_qs / (num_vfs + 1));
4042
4043                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4044                  * RSS Tables per port. Provide RSS on VFs, only if number of
4045                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4046                  */
4047                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4048                         num_vf_qs = 1;
4049         }
4050
4051         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4052          * which are modifiable using SET_PROFILE_CONFIG cmd.
4053          */
4054         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4055                                   RESOURCE_MODIFIABLE, 0);
4056
4057         /* If RSS IFACE capability flags are modifiable for a VF, set the
4058          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4059          * more than 1 RSSQ is available for a VF.
4060          * Otherwise, provision only 1 queue pair for VF.
4061          */
4062         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4063                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4064                 if (num_vf_qs > 1) {
4065                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4066                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4067                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4068                 } else {
4069                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4070                                              BE_IF_FLAGS_DEFQ_RSS);
4071                 }
4072         } else {
4073                 num_vf_qs = 1;
4074         }
4075
4076         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4077                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4079         }
4080
4081         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4082         vft_res->max_rx_qs = num_vf_qs;
4083         vft_res->max_rss_qs = num_vf_qs;
4084         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4085         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4086
4087         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4088          * among the PF and it's VFs, if the fields are changeable
4089          */
4090         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4091                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4092
4093         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4094                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4095
4096         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4097                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4098
4099         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4100                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4101 }
4102
4103 static void be_if_destroy(struct be_adapter *adapter)
4104 {
4105         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4106
4107         kfree(adapter->pmac_id);
4108         adapter->pmac_id = NULL;
4109
4110         kfree(adapter->mc_list);
4111         adapter->mc_list = NULL;
4112
4113         kfree(adapter->uc_list);
4114         adapter->uc_list = NULL;
4115 }
4116
4117 static int be_clear(struct be_adapter *adapter)
4118 {
4119         struct pci_dev *pdev = adapter->pdev;
4120         struct  be_resources vft_res = {0};
4121
4122         be_cancel_worker(adapter);
4123
4124         flush_workqueue(be_wq);
4125
4126         if (sriov_enabled(adapter))
4127                 be_vf_clear(adapter);
4128
4129         /* Re-configure FW to distribute resources evenly across max-supported
4130          * number of VFs, only when VFs are not already enabled.
4131          */
4132         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4133             !pci_vfs_assigned(pdev)) {
4134                 be_calculate_vf_res(adapter,
4135                                     pci_sriov_get_totalvfs(pdev),
4136                                     &vft_res);
4137                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4138                                         pci_sriov_get_totalvfs(pdev),
4139                                         &vft_res);
4140         }
4141
4142         be_disable_vxlan_offloads(adapter);
4143
4144         be_if_destroy(adapter);
4145
4146         be_clear_queues(adapter);
4147
4148         be_msix_disable(adapter);
4149         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4150         return 0;
4151 }
4152
4153 static int be_vfs_if_create(struct be_adapter *adapter)
4154 {
4155         struct be_resources res = {0};
4156         u32 cap_flags, en_flags, vf;
4157         struct be_vf_cfg *vf_cfg;
4158         int status;
4159
4160         /* If a FW profile exists, then cap_flags are updated */
4161         cap_flags = BE_VF_IF_EN_FLAGS;
4162
4163         for_all_vfs(adapter, vf_cfg, vf) {
4164                 if (!BE3_chip(adapter)) {
4165                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4166                                                            ACTIVE_PROFILE_TYPE,
4167                                                            RESOURCE_LIMITS,
4168                                                            vf + 1);
4169                         if (!status) {
4170                                 cap_flags = res.if_cap_flags;
4171                                 /* Prevent VFs from enabling VLAN promiscuous
4172                                  * mode
4173                                  */
4174                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4175                         }
4176                 }
4177
4178                 /* PF should enable IF flags during proxy if_create call */
4179                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4180                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4181                                           &vf_cfg->if_handle, vf + 1);
4182                 if (status)
4183                         return status;
4184         }
4185
4186         return 0;
4187 }
4188
4189 static int be_vf_setup_init(struct be_adapter *adapter)
4190 {
4191         struct be_vf_cfg *vf_cfg;
4192         int vf;
4193
4194         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4195                                   GFP_KERNEL);
4196         if (!adapter->vf_cfg)
4197                 return -ENOMEM;
4198
4199         for_all_vfs(adapter, vf_cfg, vf) {
4200                 vf_cfg->if_handle = -1;
4201                 vf_cfg->pmac_id = -1;
4202         }
4203         return 0;
4204 }
4205
4206 static int be_vf_setup(struct be_adapter *adapter)
4207 {
4208         struct device *dev = &adapter->pdev->dev;
4209         struct be_vf_cfg *vf_cfg;
4210         int status, old_vfs, vf;
4211         bool spoofchk;
4212
4213         old_vfs = pci_num_vf(adapter->pdev);
4214
4215         status = be_vf_setup_init(adapter);
4216         if (status)
4217                 goto err;
4218
4219         if (old_vfs) {
4220                 for_all_vfs(adapter, vf_cfg, vf) {
4221                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4222                         if (status)
4223                                 goto err;
4224                 }
4225
4226                 status = be_vfs_mac_query(adapter);
4227                 if (status)
4228                         goto err;
4229         } else {
4230                 status = be_vfs_if_create(adapter);
4231                 if (status)
4232                         goto err;
4233
4234                 status = be_vf_eth_addr_config(adapter);
4235                 if (status)
4236                         goto err;
4237         }
4238
4239         for_all_vfs(adapter, vf_cfg, vf) {
4240                 /* Allow VFs to programs MAC/VLAN filters */
4241                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4242                                                   vf + 1);
4243                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4244                         status = be_cmd_set_fn_privileges(adapter,
4245                                                           vf_cfg->privileges |
4246                                                           BE_PRIV_FILTMGMT,
4247                                                           vf + 1);
4248                         if (!status) {
4249                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4250                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4251                                          vf);
4252                         }
4253                 }
4254
4255                 /* Allow full available bandwidth */
4256                 if (!old_vfs)
4257                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4258
4259                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4260                                                vf_cfg->if_handle, NULL,
4261                                                &spoofchk);
4262                 if (!status)
4263                         vf_cfg->spoofchk = spoofchk;
4264
4265                 if (!old_vfs) {
4266                         be_cmd_enable_vf(adapter, vf + 1);
4267                         be_cmd_set_logical_link_config(adapter,
4268                                                        IFLA_VF_LINK_STATE_AUTO,
4269                                                        vf+1);
4270                 }
4271         }
4272
4273         if (!old_vfs) {
4274                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4275                 if (status) {
4276                         dev_err(dev, "SRIOV enable failed\n");
4277                         adapter->num_vfs = 0;
4278                         goto err;
4279                 }
4280         }
4281
4282         if (BE3_chip(adapter)) {
4283                 /* On BE3, enable VEB only when SRIOV is enabled */
4284                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4285                                                adapter->if_handle,
4286                                                PORT_FWD_TYPE_VEB, 0);
4287                 if (status)
4288                         goto err;
4289         }
4290
4291         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4292         return 0;
4293 err:
4294         dev_err(dev, "VF setup failed\n");
4295         be_vf_clear(adapter);
4296         return status;
4297 }
4298
4299 /* Converting function_mode bits on BE3 to SH mc_type enums */
4300
4301 static u8 be_convert_mc_type(u32 function_mode)
4302 {
4303         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4304                 return vNIC1;
4305         else if (function_mode & QNQ_MODE)
4306                 return FLEX10;
4307         else if (function_mode & VNIC_MODE)
4308                 return vNIC2;
4309         else if (function_mode & UMC_ENABLED)
4310                 return UMC;
4311         else
4312                 return MC_NONE;
4313 }
4314
4315 /* On BE2/BE3 FW does not suggest the supported limits */
4316 static void BEx_get_resources(struct be_adapter *adapter,
4317                               struct be_resources *res)
4318 {
4319         bool use_sriov = adapter->num_vfs ? 1 : 0;
4320
4321         if (be_physfn(adapter))
4322                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4323         else
4324                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4325
4326         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4327
4328         if (be_is_mc(adapter)) {
4329                 /* Assuming that there are 4 channels per port,
4330                  * when multi-channel is enabled
4331                  */
4332                 if (be_is_qnq_mode(adapter))
4333                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4334                 else
4335                         /* In a non-qnq multichannel mode, the pvid
4336                          * takes up one vlan entry
4337                          */
4338                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4339         } else {
4340                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4341         }
4342
4343         res->max_mcast_mac = BE_MAX_MC;
4344
4345         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4346          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4347          *    *only* if it is RSS-capable.
4348          */
4349         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4350             be_virtfn(adapter) ||
4351             (be_is_mc(adapter) &&
4352              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4353                 res->max_tx_qs = 1;
4354         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4355                 struct be_resources super_nic_res = {0};
4356
4357                 /* On a SuperNIC profile, the driver needs to use the
4358                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4359                  */
4360                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4361                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4362                                           0);
4363                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4364                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4365         } else {
4366                 res->max_tx_qs = BE3_MAX_TX_QS;
4367         }
4368
4369         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4370             !use_sriov && be_physfn(adapter))
4371                 res->max_rss_qs = (adapter->be3_native) ?
4372                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4373         res->max_rx_qs = res->max_rss_qs + 1;
4374
4375         if (be_physfn(adapter))
4376                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4377                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4378         else
4379                 res->max_evt_qs = 1;
4380
4381         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4382         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4383         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4384                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4385 }
4386
4387 static void be_setup_init(struct be_adapter *adapter)
4388 {
4389         adapter->vlan_prio_bmap = 0xff;
4390         adapter->phy.link_speed = -1;
4391         adapter->if_handle = -1;
4392         adapter->be3_native = false;
4393         adapter->if_flags = 0;
4394         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4395         if (be_physfn(adapter))
4396                 adapter->cmd_privileges = MAX_PRIVILEGES;
4397         else
4398                 adapter->cmd_privileges = MIN_PRIVILEGES;
4399 }
4400
4401 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4402  * However, this HW limitation is not exposed to the host via any SLI cmd.
4403  * As a result, in the case of SRIOV and in particular multi-partition configs
4404  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4405  * for distribution between the VFs. This self-imposed limit will determine the
4406  * no: of VFs for which RSS can be enabled.
4407  */
4408 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4409 {
4410         struct be_port_resources port_res = {0};
4411         u8 rss_tables_on_port;
4412         u16 max_vfs = be_max_vfs(adapter);
4413
4414         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4415                                   RESOURCE_LIMITS, 0);
4416
4417         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4418
4419         /* Each PF Pool's RSS Tables limit =
4420          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4421          */
4422         adapter->pool_res.max_rss_tables =
4423                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4424 }
4425
4426 static int be_get_sriov_config(struct be_adapter *adapter)
4427 {
4428         struct be_resources res = {0};
4429         int max_vfs, old_vfs;
4430
4431         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4432                                   RESOURCE_LIMITS, 0);
4433
4434         /* Some old versions of BE3 FW don't report max_vfs value */
4435         if (BE3_chip(adapter) && !res.max_vfs) {
4436                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4437                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4438         }
4439
4440         adapter->pool_res = res;
4441
4442         /* If during previous unload of the driver, the VFs were not disabled,
4443          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4444          * Instead use the TotalVFs value stored in the pci-dev struct.
4445          */
4446         old_vfs = pci_num_vf(adapter->pdev);
4447         if (old_vfs) {
4448                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4449                          old_vfs);
4450
4451                 adapter->pool_res.max_vfs =
4452                         pci_sriov_get_totalvfs(adapter->pdev);
4453                 adapter->num_vfs = old_vfs;
4454         }
4455
4456         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4457                 be_calculate_pf_pool_rss_tables(adapter);
4458                 dev_info(&adapter->pdev->dev,
4459                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4460                          be_max_pf_pool_rss_tables(adapter));
4461         }
4462         return 0;
4463 }
4464
4465 static void be_alloc_sriov_res(struct be_adapter *adapter)
4466 {
4467         int old_vfs = pci_num_vf(adapter->pdev);
4468         struct  be_resources vft_res = {0};
4469         int status;
4470
4471         be_get_sriov_config(adapter);
4472
4473         if (!old_vfs)
4474                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4475
4476         /* When the HW is in SRIOV capable configuration, the PF-pool
4477          * resources are given to PF during driver load, if there are no
4478          * old VFs. This facility is not available in BE3 FW.
4479          * Also, this is done by FW in Lancer chip.
4480          */
4481         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4482                 be_calculate_vf_res(adapter, 0, &vft_res);
4483                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4484                                                  &vft_res);
4485                 if (status)
4486                         dev_err(&adapter->pdev->dev,
4487                                 "Failed to optimize SRIOV resources\n");
4488         }
4489 }
4490
4491 static int be_get_resources(struct be_adapter *adapter)
4492 {
4493         struct device *dev = &adapter->pdev->dev;
4494         struct be_resources res = {0};
4495         int status;
4496
4497         /* For Lancer, SH etc read per-function resource limits from FW.
4498          * GET_FUNC_CONFIG returns per function guaranteed limits.
4499          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4500          */
4501         if (BEx_chip(adapter)) {
4502                 BEx_get_resources(adapter, &res);
4503         } else {
4504                 status = be_cmd_get_func_config(adapter, &res);
4505                 if (status)
4506                         return status;
4507
4508                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4509                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4510                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4511                         res.max_rss_qs -= 1;
4512         }
4513
4514         /* If RoCE is supported stash away half the EQs for RoCE */
4515         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4516                                 res.max_evt_qs / 2 : res.max_evt_qs;
4517         adapter->res = res;
4518
4519         /* If FW supports RSS default queue, then skip creating non-RSS
4520          * queue for non-IP traffic.
4521          */
4522         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4523                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4524
4525         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4526                  be_max_txqs(adapter), be_max_rxqs(adapter),
4527                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4528                  be_max_vfs(adapter));
4529         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4530                  be_max_uc(adapter), be_max_mc(adapter),
4531                  be_max_vlans(adapter));
4532
4533         /* Ensure RX and TX queues are created in pairs at init time */
4534         adapter->cfg_num_rx_irqs =
4535                                 min_t(u16, netif_get_num_default_rss_queues(),
4536                                       be_max_qp_irqs(adapter));
4537         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4538         return 0;
4539 }
4540
4541 static int be_get_config(struct be_adapter *adapter)
4542 {
4543         int status, level;
4544         u16 profile_id;
4545
4546         status = be_cmd_get_cntl_attributes(adapter);
4547         if (status)
4548                 return status;
4549
4550         status = be_cmd_query_fw_cfg(adapter);
4551         if (status)
4552                 return status;
4553
4554         if (!lancer_chip(adapter) && be_physfn(adapter))
4555                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4556
4557         if (BEx_chip(adapter)) {
4558                 level = be_cmd_get_fw_log_level(adapter);
4559                 adapter->msg_enable =
4560                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4561         }
4562
4563         be_cmd_get_acpi_wol_cap(adapter);
4564         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4565         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4566
4567         be_cmd_query_port_name(adapter);
4568
4569         if (be_physfn(adapter)) {
4570                 status = be_cmd_get_active_profile(adapter, &profile_id);
4571                 if (!status)
4572                         dev_info(&adapter->pdev->dev,
4573                                  "Using profile 0x%x\n", profile_id);
4574         }
4575
4576         return 0;
4577 }
4578
4579 static int be_mac_setup(struct be_adapter *adapter)
4580 {
4581         u8 mac[ETH_ALEN];
4582         int status;
4583
4584         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4585                 status = be_cmd_get_perm_mac(adapter, mac);
4586                 if (status)
4587                         return status;
4588
4589                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4590                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4591
4592                 /* Initial MAC for BE3 VFs is already programmed by PF */
4593                 if (BEx_chip(adapter) && be_virtfn(adapter))
4594                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4595         }
4596
4597         return 0;
4598 }
4599
4600 static void be_schedule_worker(struct be_adapter *adapter)
4601 {
4602         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4603         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4604 }
4605
4606 static void be_destroy_err_recovery_workq(void)
4607 {
4608         if (!be_err_recovery_workq)
4609                 return;
4610
4611         flush_workqueue(be_err_recovery_workq);
4612         destroy_workqueue(be_err_recovery_workq);
4613         be_err_recovery_workq = NULL;
4614 }
4615
4616 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4617 {
4618         struct be_error_recovery *err_rec = &adapter->error_recovery;
4619
4620         if (!be_err_recovery_workq)
4621                 return;
4622
4623         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4624                            msecs_to_jiffies(delay));
4625         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4626 }
4627
4628 static int be_setup_queues(struct be_adapter *adapter)
4629 {
4630         struct net_device *netdev = adapter->netdev;
4631         int status;
4632
4633         status = be_evt_queues_create(adapter);
4634         if (status)
4635                 goto err;
4636
4637         status = be_tx_qs_create(adapter);
4638         if (status)
4639                 goto err;
4640
4641         status = be_rx_cqs_create(adapter);
4642         if (status)
4643                 goto err;
4644
4645         status = be_mcc_queues_create(adapter);
4646         if (status)
4647                 goto err;
4648
4649         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4650         if (status)
4651                 goto err;
4652
4653         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4654         if (status)
4655                 goto err;
4656
4657         return 0;
4658 err:
4659         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4660         return status;
4661 }
4662
4663 static int be_if_create(struct be_adapter *adapter)
4664 {
4665         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4666         u32 cap_flags = be_if_cap_flags(adapter);
4667         int status;
4668
4669         /* alloc required memory for other filtering fields */
4670         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4671                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4672         if (!adapter->pmac_id)
4673                 return -ENOMEM;
4674
4675         adapter->mc_list = kcalloc(be_max_mc(adapter),
4676                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4677         if (!adapter->mc_list)
4678                 return -ENOMEM;
4679
4680         adapter->uc_list = kcalloc(be_max_uc(adapter),
4681                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4682         if (!adapter->uc_list)
4683                 return -ENOMEM;
4684
4685         if (adapter->cfg_num_rx_irqs == 1)
4686                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4687
4688         en_flags &= cap_flags;
4689         /* will enable all the needed filter flags in be_open() */
4690         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4691                                   &adapter->if_handle, 0);
4692
4693         if (status)
4694                 return status;
4695
4696         return 0;
4697 }
4698
4699 int be_update_queues(struct be_adapter *adapter)
4700 {
4701         struct net_device *netdev = adapter->netdev;
4702         int status;
4703
4704         if (netif_running(netdev)) {
4705                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4706                 netif_carrier_off(netdev);
4707
4708                 be_close(netdev);
4709         }
4710
4711         be_cancel_worker(adapter);
4712
4713         /* If any vectors have been shared with RoCE we cannot re-program
4714          * the MSIx table.
4715          */
4716         if (!adapter->num_msix_roce_vec)
4717                 be_msix_disable(adapter);
4718
4719         be_clear_queues(adapter);
4720         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4721         if (status)
4722                 return status;
4723
4724         if (!msix_enabled(adapter)) {
4725                 status = be_msix_enable(adapter);
4726                 if (status)
4727                         return status;
4728         }
4729
4730         status = be_if_create(adapter);
4731         if (status)
4732                 return status;
4733
4734         status = be_setup_queues(adapter);
4735         if (status)
4736                 return status;
4737
4738         be_schedule_worker(adapter);
4739
4740         /*
4741          * The IF was destroyed and re-created. We need to clear
4742          * all promiscuous flags valid for the destroyed IF.
4743          * Without this promisc mode is not restored during
4744          * be_open() because the driver thinks that it is
4745          * already enabled in HW.
4746          */
4747         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4748
4749         if (netif_running(netdev))
4750                 status = be_open(netdev);
4751
4752         return status;
4753 }
4754
4755 static inline int fw_major_num(const char *fw_ver)
4756 {
4757         int fw_major = 0, i;
4758
4759         i = sscanf(fw_ver, "%d.", &fw_major);
4760         if (i != 1)
4761                 return 0;
4762
4763         return fw_major;
4764 }
4765
4766 /* If it is error recovery, FLR the PF
4767  * Else if any VFs are already enabled don't FLR the PF
4768  */
4769 static bool be_reset_required(struct be_adapter *adapter)
4770 {
4771         if (be_error_recovering(adapter))
4772                 return true;
4773         else
4774                 return pci_num_vf(adapter->pdev) == 0;
4775 }
4776
4777 /* Wait for the FW to be ready and perform the required initialization */
4778 static int be_func_init(struct be_adapter *adapter)
4779 {
4780         int status;
4781
4782         status = be_fw_wait_ready(adapter);
4783         if (status)
4784                 return status;
4785
4786         /* FW is now ready; clear errors to allow cmds/doorbell */
4787         be_clear_error(adapter, BE_CLEAR_ALL);
4788
4789         if (be_reset_required(adapter)) {
4790                 status = be_cmd_reset_function(adapter);
4791                 if (status)
4792                         return status;
4793
4794                 /* Wait for interrupts to quiesce after an FLR */
4795                 msleep(100);
4796         }
4797
4798         /* Tell FW we're ready to fire cmds */
4799         status = be_cmd_fw_init(adapter);
4800         if (status)
4801                 return status;
4802
4803         /* Allow interrupts for other ULPs running on NIC function */
4804         be_intr_set(adapter, true);
4805
4806         return 0;
4807 }
4808
4809 static int be_setup(struct be_adapter *adapter)
4810 {
4811         struct device *dev = &adapter->pdev->dev;
4812         int status;
4813
4814         status = be_func_init(adapter);
4815         if (status)
4816                 return status;
4817
4818         be_setup_init(adapter);
4819
4820         if (!lancer_chip(adapter))
4821                 be_cmd_req_native_mode(adapter);
4822
4823         /* invoke this cmd first to get pf_num and vf_num which are needed
4824          * for issuing profile related cmds
4825          */
4826         if (!BEx_chip(adapter)) {
4827                 status = be_cmd_get_func_config(adapter, NULL);
4828                 if (status)
4829                         return status;
4830         }
4831
4832         status = be_get_config(adapter);
4833         if (status)
4834                 goto err;
4835
4836         if (!BE2_chip(adapter) && be_physfn(adapter))
4837                 be_alloc_sriov_res(adapter);
4838
4839         status = be_get_resources(adapter);
4840         if (status)
4841                 goto err;
4842
4843         status = be_msix_enable(adapter);
4844         if (status)
4845                 goto err;
4846
4847         /* will enable all the needed filter flags in be_open() */
4848         status = be_if_create(adapter);
4849         if (status)
4850                 goto err;
4851
4852         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4853         rtnl_lock();
4854         status = be_setup_queues(adapter);
4855         rtnl_unlock();
4856         if (status)
4857                 goto err;
4858
4859         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4860
4861         status = be_mac_setup(adapter);
4862         if (status)
4863                 goto err;
4864
4865         be_cmd_get_fw_ver(adapter);
4866         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4867
4868         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4869                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4870                         adapter->fw_ver);
4871                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4872         }
4873
4874         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4875                                          adapter->rx_fc);
4876         if (status)
4877                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4878                                         &adapter->rx_fc);
4879
4880         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4881                  adapter->tx_fc, adapter->rx_fc);
4882
4883         if (be_physfn(adapter))
4884                 be_cmd_set_logical_link_config(adapter,
4885                                                IFLA_VF_LINK_STATE_AUTO, 0);
4886
4887         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4888          * confusing a linux bridge or OVS that it might be connected to.
4889          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4890          * when SRIOV is not enabled.
4891          */
4892         if (BE3_chip(adapter))
4893                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4894                                       PORT_FWD_TYPE_PASSTHRU, 0);
4895
4896         if (adapter->num_vfs)
4897                 be_vf_setup(adapter);
4898
4899         status = be_cmd_get_phy_info(adapter);
4900         if (!status && be_pause_supported(adapter))
4901                 adapter->phy.fc_autoneg = 1;
4902
4903         if (be_physfn(adapter) && !lancer_chip(adapter))
4904                 be_cmd_set_features(adapter);
4905
4906         be_schedule_worker(adapter);
4907         adapter->flags |= BE_FLAGS_SETUP_DONE;
4908         return 0;
4909 err:
4910         be_clear(adapter);
4911         return status;
4912 }
4913
4914 #ifdef CONFIG_NET_POLL_CONTROLLER
4915 static void be_netpoll(struct net_device *netdev)
4916 {
4917         struct be_adapter *adapter = netdev_priv(netdev);
4918         struct be_eq_obj *eqo;
4919         int i;
4920
4921         for_all_evt_queues(adapter, eqo, i) {
4922                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4923                 napi_schedule(&eqo->napi);
4924         }
4925 }
4926 #endif
4927
4928 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4929 {
4930         const struct firmware *fw;
4931         int status;
4932
4933         if (!netif_running(adapter->netdev)) {
4934                 dev_err(&adapter->pdev->dev,
4935                         "Firmware load not allowed (interface is down)\n");
4936                 return -ENETDOWN;
4937         }
4938
4939         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4940         if (status)
4941                 goto fw_exit;
4942
4943         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4944
4945         if (lancer_chip(adapter))
4946                 status = lancer_fw_download(adapter, fw);
4947         else
4948                 status = be_fw_download(adapter, fw);
4949
4950         if (!status)
4951                 be_cmd_get_fw_ver(adapter);
4952
4953 fw_exit:
4954         release_firmware(fw);
4955         return status;
4956 }
4957
4958 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4959                                  u16 flags)
4960 {
4961         struct be_adapter *adapter = netdev_priv(dev);
4962         struct nlattr *attr, *br_spec;
4963         int rem;
4964         int status = 0;
4965         u16 mode = 0;
4966
4967         if (!sriov_enabled(adapter))
4968                 return -EOPNOTSUPP;
4969
4970         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4971         if (!br_spec)
4972                 return -EINVAL;
4973
4974         nla_for_each_nested(attr, br_spec, rem) {
4975                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4976                         continue;
4977
4978                 if (nla_len(attr) < sizeof(mode))
4979                         return -EINVAL;
4980
4981                 mode = nla_get_u16(attr);
4982                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4983                         return -EOPNOTSUPP;
4984
4985                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4986                         return -EINVAL;
4987
4988                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4989                                                adapter->if_handle,
4990                                                mode == BRIDGE_MODE_VEPA ?
4991                                                PORT_FWD_TYPE_VEPA :
4992                                                PORT_FWD_TYPE_VEB, 0);
4993                 if (status)
4994                         goto err;
4995
4996                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4997                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4998
4999                 return status;
5000         }
5001 err:
5002         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5003                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5004
5005         return status;
5006 }
5007
5008 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5009                                  struct net_device *dev, u32 filter_mask,
5010                                  int nlflags)
5011 {
5012         struct be_adapter *adapter = netdev_priv(dev);
5013         int status = 0;
5014         u8 hsw_mode;
5015
5016         /* BE and Lancer chips support VEB mode only */
5017         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5018                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5019                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5020                         return 0;
5021                 hsw_mode = PORT_FWD_TYPE_VEB;
5022         } else {
5023                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5024                                                adapter->if_handle, &hsw_mode,
5025                                                NULL);
5026                 if (status)
5027                         return 0;
5028
5029                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5030                         return 0;
5031         }
5032
5033         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5034                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5035                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5036                                        0, 0, nlflags, filter_mask, NULL);
5037 }
5038
5039 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5040                                          void (*func)(struct work_struct *))
5041 {
5042         struct be_cmd_work *work;
5043
5044         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5045         if (!work) {
5046                 dev_err(&adapter->pdev->dev,
5047                         "be_work memory allocation failed\n");
5048                 return NULL;
5049         }
5050
5051         INIT_WORK(&work->work, func);
5052         work->adapter = adapter;
5053         return work;
5054 }
5055
5056 /* VxLAN offload Notes:
5057  *
5058  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5059  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5060  * is expected to work across all types of IP tunnels once exported. Skyhawk
5061  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5062  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5063  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5064  * those other tunnels are unexported on the fly through ndo_features_check().
5065  *
5066  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5067  * adds more than one port, disable offloads and don't re-enable them again
5068  * until after all the tunnels are removed.
5069  */
5070 static void be_work_add_vxlan_port(struct work_struct *work)
5071 {
5072         struct be_cmd_work *cmd_work =
5073                                 container_of(work, struct be_cmd_work, work);
5074         struct be_adapter *adapter = cmd_work->adapter;
5075         struct net_device *netdev = adapter->netdev;
5076         struct device *dev = &adapter->pdev->dev;
5077         __be16 port = cmd_work->info.vxlan_port;
5078         int status;
5079
5080         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5081                 adapter->vxlan_port_aliases++;
5082                 goto done;
5083         }
5084
5085         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5086                 dev_info(dev,
5087                          "Only one UDP port supported for VxLAN offloads\n");
5088                 dev_info(dev, "Disabling VxLAN offloads\n");
5089                 adapter->vxlan_port_count++;
5090                 goto err;
5091         }
5092
5093         if (adapter->vxlan_port_count++ >= 1)
5094                 goto done;
5095
5096         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5097                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5098         if (status) {
5099                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5100                 goto err;
5101         }
5102
5103         status = be_cmd_set_vxlan_port(adapter, port);
5104         if (status) {
5105                 dev_warn(dev, "Failed to add VxLAN port\n");
5106                 goto err;
5107         }
5108         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5109         adapter->vxlan_port = port;
5110
5111         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5112                                    NETIF_F_TSO | NETIF_F_TSO6 |
5113                                    NETIF_F_GSO_UDP_TUNNEL;
5114         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5115         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5116
5117         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5118                  be16_to_cpu(port));
5119         goto done;
5120 err:
5121         be_disable_vxlan_offloads(adapter);
5122 done:
5123         kfree(cmd_work);
5124 }
5125
5126 static void be_work_del_vxlan_port(struct work_struct *work)
5127 {
5128         struct be_cmd_work *cmd_work =
5129                                 container_of(work, struct be_cmd_work, work);
5130         struct be_adapter *adapter = cmd_work->adapter;
5131         __be16 port = cmd_work->info.vxlan_port;
5132
5133         if (adapter->vxlan_port != port)
5134                 goto done;
5135
5136         if (adapter->vxlan_port_aliases) {
5137                 adapter->vxlan_port_aliases--;
5138                 goto out;
5139         }
5140
5141         be_disable_vxlan_offloads(adapter);
5142
5143         dev_info(&adapter->pdev->dev,
5144                  "Disabled VxLAN offloads for UDP port %d\n",
5145                  be16_to_cpu(port));
5146 done:
5147         adapter->vxlan_port_count--;
5148 out:
5149         kfree(cmd_work);
5150 }
5151
5152 static void be_cfg_vxlan_port(struct net_device *netdev,
5153                               struct udp_tunnel_info *ti,
5154                               void (*func)(struct work_struct *))
5155 {
5156         struct be_adapter *adapter = netdev_priv(netdev);
5157         struct be_cmd_work *cmd_work;
5158
5159         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5160                 return;
5161
5162         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5163                 return;
5164
5165         cmd_work = be_alloc_work(adapter, func);
5166         if (cmd_work) {
5167                 cmd_work->info.vxlan_port = ti->port;
5168                 queue_work(be_wq, &cmd_work->work);
5169         }
5170 }
5171
5172 static void be_del_vxlan_port(struct net_device *netdev,
5173                               struct udp_tunnel_info *ti)
5174 {
5175         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5176 }
5177
5178 static void be_add_vxlan_port(struct net_device *netdev,
5179                               struct udp_tunnel_info *ti)
5180 {
5181         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5182 }
5183
5184 static netdev_features_t be_features_check(struct sk_buff *skb,
5185                                            struct net_device *dev,
5186                                            netdev_features_t features)
5187 {
5188         struct be_adapter *adapter = netdev_priv(dev);
5189         u8 l4_hdr = 0;
5190
5191         /* The code below restricts offload features for some tunneled and
5192          * Q-in-Q packets.
5193          * Offload features for normal (non tunnel) packets are unchanged.
5194          */
5195         features = vlan_features_check(skb, features);
5196         if (!skb->encapsulation ||
5197             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5198                 return features;
5199
5200         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5201          * should disable tunnel offload features if it's not a VxLAN packet,
5202          * as tunnel offloads have been enabled only for VxLAN. This is done to
5203          * allow other tunneled traffic like GRE work fine while VxLAN
5204          * offloads are configured in Skyhawk-R.
5205          */
5206         switch (vlan_get_protocol(skb)) {
5207         case htons(ETH_P_IP):
5208                 l4_hdr = ip_hdr(skb)->protocol;
5209                 break;
5210         case htons(ETH_P_IPV6):
5211                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5212                 break;
5213         default:
5214                 return features;
5215         }
5216
5217         if (l4_hdr != IPPROTO_UDP ||
5218             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5219             skb->inner_protocol != htons(ETH_P_TEB) ||
5220             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5221                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5222             !adapter->vxlan_port ||
5223             udp_hdr(skb)->dest != adapter->vxlan_port)
5224                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5225
5226         return features;
5227 }
5228
5229 static int be_get_phys_port_id(struct net_device *dev,
5230                                struct netdev_phys_item_id *ppid)
5231 {
5232         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5233         struct be_adapter *adapter = netdev_priv(dev);
5234         u8 *id;
5235
5236         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5237                 return -ENOSPC;
5238
5239         ppid->id[0] = adapter->hba_port_num + 1;
5240         id = &ppid->id[1];
5241         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5242              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5243                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5244
5245         ppid->id_len = id_len;
5246
5247         return 0;
5248 }
5249
5250 static void be_set_rx_mode(struct net_device *dev)
5251 {
5252         struct be_adapter *adapter = netdev_priv(dev);
5253         struct be_cmd_work *work;
5254
5255         work = be_alloc_work(adapter, be_work_set_rx_mode);
5256         if (work)
5257                 queue_work(be_wq, &work->work);
5258 }
5259
5260 static const struct net_device_ops be_netdev_ops = {
5261         .ndo_open               = be_open,
5262         .ndo_stop               = be_close,
5263         .ndo_start_xmit         = be_xmit,
5264         .ndo_set_rx_mode        = be_set_rx_mode,
5265         .ndo_set_mac_address    = be_mac_addr_set,
5266         .ndo_change_mtu         = be_change_mtu,
5267         .ndo_get_stats64        = be_get_stats64,
5268         .ndo_validate_addr      = eth_validate_addr,
5269         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5270         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5271         .ndo_set_vf_mac         = be_set_vf_mac,
5272         .ndo_set_vf_vlan        = be_set_vf_vlan,
5273         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5274         .ndo_get_vf_config      = be_get_vf_config,
5275         .ndo_set_vf_link_state  = be_set_vf_link_state,
5276         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5277 #ifdef CONFIG_NET_POLL_CONTROLLER
5278         .ndo_poll_controller    = be_netpoll,
5279 #endif
5280         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5281         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5282 #ifdef CONFIG_NET_RX_BUSY_POLL
5283         .ndo_busy_poll          = be_busy_poll,
5284 #endif
5285         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5286         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5287         .ndo_features_check     = be_features_check,
5288         .ndo_get_phys_port_id   = be_get_phys_port_id,
5289 };
5290
5291 static void be_netdev_init(struct net_device *netdev)
5292 {
5293         struct be_adapter *adapter = netdev_priv(netdev);
5294
5295         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5296                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5297                 NETIF_F_HW_VLAN_CTAG_TX;
5298         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5299                 netdev->hw_features |= NETIF_F_RXHASH;
5300
5301         netdev->features |= netdev->hw_features |
5302                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5303
5304         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5305                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5306
5307         netdev->priv_flags |= IFF_UNICAST_FLT;
5308
5309         netdev->flags |= IFF_MULTICAST;
5310
5311         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5312
5313         netdev->netdev_ops = &be_netdev_ops;
5314
5315         netdev->ethtool_ops = &be_ethtool_ops;
5316 }
5317
5318 static void be_cleanup(struct be_adapter *adapter)
5319 {
5320         struct net_device *netdev = adapter->netdev;
5321
5322         rtnl_lock();
5323         netif_device_detach(netdev);
5324         if (netif_running(netdev))
5325                 be_close(netdev);
5326         rtnl_unlock();
5327
5328         be_clear(adapter);
5329 }
5330
5331 static int be_resume(struct be_adapter *adapter)
5332 {
5333         struct net_device *netdev = adapter->netdev;
5334         int status;
5335
5336         status = be_setup(adapter);
5337         if (status)
5338                 return status;
5339
5340         rtnl_lock();
5341         if (netif_running(netdev))
5342                 status = be_open(netdev);
5343         rtnl_unlock();
5344
5345         if (status)
5346                 return status;
5347
5348         netif_device_attach(netdev);
5349
5350         return 0;
5351 }
5352
5353 static void be_soft_reset(struct be_adapter *adapter)
5354 {
5355         u32 val;
5356
5357         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5358         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5359         val |= SLIPORT_SOFTRESET_SR_MASK;
5360         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5361 }
5362
5363 static bool be_err_is_recoverable(struct be_adapter *adapter)
5364 {
5365         struct be_error_recovery *err_rec = &adapter->error_recovery;
5366         unsigned long initial_idle_time =
5367                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5368         unsigned long recovery_interval =
5369                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5370         u16 ue_err_code;
5371         u32 val;
5372
5373         val = be_POST_stage_get(adapter);
5374         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5375                 return false;
5376         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5377         if (ue_err_code == 0)
5378                 return false;
5379
5380         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5381                 ue_err_code);
5382
5383         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5384                 dev_err(&adapter->pdev->dev,
5385                         "Cannot recover within %lu sec from driver load\n",
5386                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5387                 return false;
5388         }
5389
5390         if (err_rec->last_recovery_time &&
5391             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5392                 dev_err(&adapter->pdev->dev,
5393                         "Cannot recover within %lu sec from last recovery\n",
5394                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5395                 return false;
5396         }
5397
5398         if (ue_err_code == err_rec->last_err_code) {
5399                 dev_err(&adapter->pdev->dev,
5400                         "Cannot recover from a consecutive TPE error\n");
5401                 return false;
5402         }
5403
5404         err_rec->last_recovery_time = jiffies;
5405         err_rec->last_err_code = ue_err_code;
5406         return true;
5407 }
5408
5409 static int be_tpe_recover(struct be_adapter *adapter)
5410 {
5411         struct be_error_recovery *err_rec = &adapter->error_recovery;
5412         int status = -EAGAIN;
5413         u32 val;
5414
5415         switch (err_rec->recovery_state) {
5416         case ERR_RECOVERY_ST_NONE:
5417                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5418                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5419                 break;
5420
5421         case ERR_RECOVERY_ST_DETECT:
5422                 val = be_POST_stage_get(adapter);
5423                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5424                     POST_STAGE_RECOVERABLE_ERR) {
5425                         dev_err(&adapter->pdev->dev,
5426                                 "Unrecoverable HW error detected: 0x%x\n", val);
5427                         status = -EINVAL;
5428                         err_rec->resched_delay = 0;
5429                         break;
5430                 }
5431
5432                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5433
5434                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5435                  * milliseconds before it checks for final error status in
5436                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5437                  * If it does, then PF0 initiates a Soft Reset.
5438                  */
5439                 if (adapter->pf_num == 0) {
5440                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5441                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5442                                         ERR_RECOVERY_UE_DETECT_DURATION;
5443                         break;
5444                 }
5445
5446                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5447                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5448                                         ERR_RECOVERY_UE_DETECT_DURATION;
5449                 break;
5450
5451         case ERR_RECOVERY_ST_RESET:
5452                 if (!be_err_is_recoverable(adapter)) {
5453                         dev_err(&adapter->pdev->dev,
5454                                 "Failed to meet recovery criteria\n");
5455                         status = -EIO;
5456                         err_rec->resched_delay = 0;
5457                         break;
5458                 }
5459                 be_soft_reset(adapter);
5460                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5461                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5462                                         err_rec->ue_to_reset_time;
5463                 break;
5464
5465         case ERR_RECOVERY_ST_PRE_POLL:
5466                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5467                 err_rec->resched_delay = 0;
5468                 status = 0;                     /* done */
5469                 break;
5470
5471         default:
5472                 status = -EINVAL;
5473                 err_rec->resched_delay = 0;
5474                 break;
5475         }
5476
5477         return status;
5478 }
5479
5480 static int be_err_recover(struct be_adapter *adapter)
5481 {
5482         int status;
5483
5484         if (!lancer_chip(adapter)) {
5485                 if (!adapter->error_recovery.recovery_supported ||
5486                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5487                         return -EIO;
5488                 status = be_tpe_recover(adapter);
5489                 if (status)
5490                         goto err;
5491         }
5492
5493         /* Wait for adapter to reach quiescent state before
5494          * destroying queues
5495          */
5496         status = be_fw_wait_ready(adapter);
5497         if (status)
5498                 goto err;
5499
5500         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5501
5502         be_cleanup(adapter);
5503
5504         status = be_resume(adapter);
5505         if (status)
5506                 goto err;
5507
5508         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5509
5510 err:
5511         return status;
5512 }
5513
5514 static void be_err_detection_task(struct work_struct *work)
5515 {
5516         struct be_error_recovery *err_rec =
5517                         container_of(work, struct be_error_recovery,
5518                                      err_detection_work.work);
5519         struct be_adapter *adapter =
5520                         container_of(err_rec, struct be_adapter,
5521                                      error_recovery);
5522         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5523         struct device *dev = &adapter->pdev->dev;
5524         int recovery_status;
5525
5526         be_detect_error(adapter);
5527         if (!be_check_error(adapter, BE_ERROR_HW))
5528                 goto reschedule_task;
5529
5530         recovery_status = be_err_recover(adapter);
5531         if (!recovery_status) {
5532                 err_rec->recovery_retries = 0;
5533                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5534                 dev_info(dev, "Adapter recovery successful\n");
5535                 goto reschedule_task;
5536         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5537                 /* BEx/SH recovery state machine */
5538                 if (adapter->pf_num == 0 &&
5539                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5540                         dev_err(&adapter->pdev->dev,
5541                                 "Adapter recovery in progress\n");
5542                 resched_delay = err_rec->resched_delay;
5543                 goto reschedule_task;
5544         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5545                 /* For VFs, check if PF have allocated resources
5546                  * every second.
5547                  */
5548                 dev_err(dev, "Re-trying adapter recovery\n");
5549                 goto reschedule_task;
5550         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5551                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5552                 /* In case of another error during recovery, it takes 30 sec
5553                  * for adapter to come out of error. Retry error recovery after
5554                  * this time interval.
5555                  */
5556                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5557                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5558                 goto reschedule_task;
5559         } else {
5560                 dev_err(dev, "Adapter recovery failed\n");
5561                 dev_err(dev, "Please reboot server to recover\n");
5562         }
5563
5564         return;
5565
5566 reschedule_task:
5567         be_schedule_err_detection(adapter, resched_delay);
5568 }
5569
5570 static void be_log_sfp_info(struct be_adapter *adapter)
5571 {
5572         int status;
5573
5574         status = be_cmd_query_sfp_info(adapter);
5575         if (!status) {
5576                 dev_err(&adapter->pdev->dev,
5577                         "Port %c: %s Vendor: %s part no: %s",
5578                         adapter->port_name,
5579                         be_misconfig_evt_port_state[adapter->phy_state],
5580                         adapter->phy.vendor_name,
5581                         adapter->phy.vendor_pn);
5582         }
5583         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5584 }
5585
5586 static void be_worker(struct work_struct *work)
5587 {
5588         struct be_adapter *adapter =
5589                 container_of(work, struct be_adapter, work.work);
5590         struct be_rx_obj *rxo;
5591         int i;
5592
5593         if (be_physfn(adapter) &&
5594             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5595                 be_cmd_get_die_temperature(adapter);
5596
5597         /* when interrupts are not yet enabled, just reap any pending
5598          * mcc completions
5599          */
5600         if (!netif_running(adapter->netdev)) {
5601                 local_bh_disable();
5602                 be_process_mcc(adapter);
5603                 local_bh_enable();
5604                 goto reschedule;
5605         }
5606
5607         if (!adapter->stats_cmd_sent) {
5608                 if (lancer_chip(adapter))
5609                         lancer_cmd_get_pport_stats(adapter,
5610                                                    &adapter->stats_cmd);
5611                 else
5612                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5613         }
5614
5615         for_all_rx_queues(adapter, rxo, i) {
5616                 /* Replenish RX-queues starved due to memory
5617                  * allocation failures.
5618                  */
5619                 if (rxo->rx_post_starved)
5620                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5621         }
5622
5623         /* EQ-delay update for Skyhawk is done while notifying EQ */
5624         if (!skyhawk_chip(adapter))
5625                 be_eqd_update(adapter, false);
5626
5627         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5628                 be_log_sfp_info(adapter);
5629
5630 reschedule:
5631         adapter->work_counter++;
5632         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5633 }
5634
5635 static void be_unmap_pci_bars(struct be_adapter *adapter)
5636 {
5637         if (adapter->csr)
5638                 pci_iounmap(adapter->pdev, adapter->csr);
5639         if (adapter->db)
5640                 pci_iounmap(adapter->pdev, adapter->db);
5641         if (adapter->pcicfg && adapter->pcicfg_mapped)
5642                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5643 }
5644
5645 static int db_bar(struct be_adapter *adapter)
5646 {
5647         if (lancer_chip(adapter) || be_virtfn(adapter))
5648                 return 0;
5649         else
5650                 return 4;
5651 }
5652
5653 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5654 {
5655         if (skyhawk_chip(adapter)) {
5656                 adapter->roce_db.size = 4096;
5657                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5658                                                               db_bar(adapter));
5659                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5660                                                                db_bar(adapter));
5661         }
5662         return 0;
5663 }
5664
5665 static int be_map_pci_bars(struct be_adapter *adapter)
5666 {
5667         struct pci_dev *pdev = adapter->pdev;
5668         u8 __iomem *addr;
5669         u32 sli_intf;
5670
5671         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5672         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5673                                 SLI_INTF_FAMILY_SHIFT;
5674         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5675
5676         if (BEx_chip(adapter) && be_physfn(adapter)) {
5677                 adapter->csr = pci_iomap(pdev, 2, 0);
5678                 if (!adapter->csr)
5679                         return -ENOMEM;
5680         }
5681
5682         addr = pci_iomap(pdev, db_bar(adapter), 0);
5683         if (!addr)
5684                 goto pci_map_err;
5685         adapter->db = addr;
5686
5687         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5688                 if (be_physfn(adapter)) {
5689                         /* PCICFG is the 2nd BAR in BE2 */
5690                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5691                         if (!addr)
5692                                 goto pci_map_err;
5693                         adapter->pcicfg = addr;
5694                         adapter->pcicfg_mapped = true;
5695                 } else {
5696                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5697                         adapter->pcicfg_mapped = false;
5698                 }
5699         }
5700
5701         be_roce_map_pci_bars(adapter);
5702         return 0;
5703
5704 pci_map_err:
5705         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5706         be_unmap_pci_bars(adapter);
5707         return -ENOMEM;
5708 }
5709
5710 static void be_drv_cleanup(struct be_adapter *adapter)
5711 {
5712         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5713         struct device *dev = &adapter->pdev->dev;
5714
5715         if (mem->va)
5716                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5717
5718         mem = &adapter->rx_filter;
5719         if (mem->va)
5720                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5721
5722         mem = &adapter->stats_cmd;
5723         if (mem->va)
5724                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5725 }
5726
5727 /* Allocate and initialize various fields in be_adapter struct */
5728 static int be_drv_init(struct be_adapter *adapter)
5729 {
5730         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5731         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5732         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5733         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5734         struct device *dev = &adapter->pdev->dev;
5735         int status = 0;
5736
5737         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5738         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5739                                                  &mbox_mem_alloc->dma,
5740                                                  GFP_KERNEL);
5741         if (!mbox_mem_alloc->va)
5742                 return -ENOMEM;
5743
5744         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5745         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5746         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5747
5748         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5749         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5750                                             &rx_filter->dma, GFP_KERNEL);
5751         if (!rx_filter->va) {
5752                 status = -ENOMEM;
5753                 goto free_mbox;
5754         }
5755
5756         if (lancer_chip(adapter))
5757                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5758         else if (BE2_chip(adapter))
5759                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5760         else if (BE3_chip(adapter))
5761                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5762         else
5763                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5764         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5765                                             &stats_cmd->dma, GFP_KERNEL);
5766         if (!stats_cmd->va) {
5767                 status = -ENOMEM;
5768                 goto free_rx_filter;
5769         }
5770
5771         mutex_init(&adapter->mbox_lock);
5772         mutex_init(&adapter->mcc_lock);
5773         mutex_init(&adapter->rx_filter_lock);
5774         spin_lock_init(&adapter->mcc_cq_lock);
5775         init_completion(&adapter->et_cmd_compl);
5776
5777         pci_save_state(adapter->pdev);
5778
5779         INIT_DELAYED_WORK(&adapter->work, be_worker);
5780
5781         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5782         adapter->error_recovery.resched_delay = 0;
5783         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5784                           be_err_detection_task);
5785
5786         adapter->rx_fc = true;
5787         adapter->tx_fc = true;
5788
5789         /* Must be a power of 2 or else MODULO will BUG_ON */
5790         adapter->be_get_temp_freq = 64;
5791
5792         return 0;
5793
5794 free_rx_filter:
5795         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5796 free_mbox:
5797         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5798                           mbox_mem_alloc->dma);
5799         return status;
5800 }
5801
5802 static void be_remove(struct pci_dev *pdev)
5803 {
5804         struct be_adapter *adapter = pci_get_drvdata(pdev);
5805
5806         if (!adapter)
5807                 return;
5808
5809         be_roce_dev_remove(adapter);
5810         be_intr_set(adapter, false);
5811
5812         be_cancel_err_detection(adapter);
5813
5814         unregister_netdev(adapter->netdev);
5815
5816         be_clear(adapter);
5817
5818         if (!pci_vfs_assigned(adapter->pdev))
5819                 be_cmd_reset_function(adapter);
5820
5821         /* tell fw we're done with firing cmds */
5822         be_cmd_fw_clean(adapter);
5823
5824         be_unmap_pci_bars(adapter);
5825         be_drv_cleanup(adapter);
5826
5827         pci_disable_pcie_error_reporting(pdev);
5828
5829         pci_release_regions(pdev);
5830         pci_disable_device(pdev);
5831
5832         free_netdev(adapter->netdev);
5833 }
5834
5835 static ssize_t be_hwmon_show_temp(struct device *dev,
5836                                   struct device_attribute *dev_attr,
5837                                   char *buf)
5838 {
5839         struct be_adapter *adapter = dev_get_drvdata(dev);
5840
5841         /* Unit: millidegree Celsius */
5842         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5843                 return -EIO;
5844         else
5845                 return sprintf(buf, "%u\n",
5846                                adapter->hwmon_info.be_on_die_temp * 1000);
5847 }
5848
5849 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5850                           be_hwmon_show_temp, NULL, 1);
5851
5852 static struct attribute *be_hwmon_attrs[] = {
5853         &sensor_dev_attr_temp1_input.dev_attr.attr,
5854         NULL
5855 };
5856
5857 ATTRIBUTE_GROUPS(be_hwmon);
5858
5859 static char *mc_name(struct be_adapter *adapter)
5860 {
5861         char *str = ""; /* default */
5862
5863         switch (adapter->mc_type) {
5864         case UMC:
5865                 str = "UMC";
5866                 break;
5867         case FLEX10:
5868                 str = "FLEX10";
5869                 break;
5870         case vNIC1:
5871                 str = "vNIC-1";
5872                 break;
5873         case nPAR:
5874                 str = "nPAR";
5875                 break;
5876         case UFP:
5877                 str = "UFP";
5878                 break;
5879         case vNIC2:
5880                 str = "vNIC-2";
5881                 break;
5882         default:
5883                 str = "";
5884         }
5885
5886         return str;
5887 }
5888
5889 static inline char *func_name(struct be_adapter *adapter)
5890 {
5891         return be_physfn(adapter) ? "PF" : "VF";
5892 }
5893
5894 static inline char *nic_name(struct pci_dev *pdev)
5895 {
5896         switch (pdev->device) {
5897         case OC_DEVICE_ID1:
5898                 return OC_NAME;
5899         case OC_DEVICE_ID2:
5900                 return OC_NAME_BE;
5901         case OC_DEVICE_ID3:
5902         case OC_DEVICE_ID4:
5903                 return OC_NAME_LANCER;
5904         case BE_DEVICE_ID2:
5905                 return BE3_NAME;
5906         case OC_DEVICE_ID5:
5907         case OC_DEVICE_ID6:
5908                 return OC_NAME_SH;
5909         default:
5910                 return BE_NAME;
5911         }
5912 }
5913
5914 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5915 {
5916         struct be_adapter *adapter;
5917         struct net_device *netdev;
5918         int status = 0;
5919
5920         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5921
5922         status = pci_enable_device(pdev);
5923         if (status)
5924                 goto do_none;
5925
5926         status = pci_request_regions(pdev, DRV_NAME);
5927         if (status)
5928                 goto disable_dev;
5929         pci_set_master(pdev);
5930
5931         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5932         if (!netdev) {
5933                 status = -ENOMEM;
5934                 goto rel_reg;
5935         }
5936         adapter = netdev_priv(netdev);
5937         adapter->pdev = pdev;
5938         pci_set_drvdata(pdev, adapter);
5939         adapter->netdev = netdev;
5940         SET_NETDEV_DEV(netdev, &pdev->dev);
5941
5942         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5943         if (!status) {
5944                 netdev->features |= NETIF_F_HIGHDMA;
5945         } else {
5946                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5947                 if (status) {
5948                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5949                         goto free_netdev;
5950                 }
5951         }
5952
5953         status = pci_enable_pcie_error_reporting(pdev);
5954         if (!status)
5955                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5956
5957         status = be_map_pci_bars(adapter);
5958         if (status)
5959                 goto free_netdev;
5960
5961         status = be_drv_init(adapter);
5962         if (status)
5963                 goto unmap_bars;
5964
5965         status = be_setup(adapter);
5966         if (status)
5967                 goto drv_cleanup;
5968
5969         be_netdev_init(netdev);
5970         status = register_netdev(netdev);
5971         if (status != 0)
5972                 goto unsetup;
5973
5974         be_roce_dev_add(adapter);
5975
5976         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5977         adapter->error_recovery.probe_time = jiffies;
5978
5979         /* On Die temperature not supported for VF. */
5980         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5981                 adapter->hwmon_info.hwmon_dev =
5982                         devm_hwmon_device_register_with_groups(&pdev->dev,
5983                                                                DRV_NAME,
5984                                                                adapter,
5985                                                                be_hwmon_groups);
5986                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5987         }
5988
5989         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5990                  func_name(adapter), mc_name(adapter), adapter->port_name);
5991
5992         return 0;
5993
5994 unsetup:
5995         be_clear(adapter);
5996 drv_cleanup:
5997         be_drv_cleanup(adapter);
5998 unmap_bars:
5999         be_unmap_pci_bars(adapter);
6000 free_netdev:
6001         pci_disable_pcie_error_reporting(pdev);
6002         free_netdev(netdev);
6003 rel_reg:
6004         pci_release_regions(pdev);
6005 disable_dev:
6006         pci_disable_device(pdev);
6007 do_none:
6008         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6009         return status;
6010 }
6011
6012 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6013 {
6014         struct be_adapter *adapter = pci_get_drvdata(pdev);
6015
6016         be_intr_set(adapter, false);
6017         be_cancel_err_detection(adapter);
6018
6019         be_cleanup(adapter);
6020
6021         pci_save_state(pdev);
6022         pci_disable_device(pdev);
6023         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6024         return 0;
6025 }
6026
6027 static int be_pci_resume(struct pci_dev *pdev)
6028 {
6029         struct be_adapter *adapter = pci_get_drvdata(pdev);
6030         int status = 0;
6031
6032         status = pci_enable_device(pdev);
6033         if (status)
6034                 return status;
6035
6036         pci_restore_state(pdev);
6037
6038         status = be_resume(adapter);
6039         if (status)
6040                 return status;
6041
6042         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6043
6044         return 0;
6045 }
6046
6047 /*
6048  * An FLR will stop BE from DMAing any data.
6049  */
6050 static void be_shutdown(struct pci_dev *pdev)
6051 {
6052         struct be_adapter *adapter = pci_get_drvdata(pdev);
6053
6054         if (!adapter)
6055                 return;
6056
6057         be_roce_dev_shutdown(adapter);
6058         cancel_delayed_work_sync(&adapter->work);
6059         be_cancel_err_detection(adapter);
6060
6061         netif_device_detach(adapter->netdev);
6062
6063         be_cmd_reset_function(adapter);
6064
6065         pci_disable_device(pdev);
6066 }
6067
6068 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6069                                             pci_channel_state_t state)
6070 {
6071         struct be_adapter *adapter = pci_get_drvdata(pdev);
6072
6073         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6074
6075         be_roce_dev_remove(adapter);
6076
6077         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6078                 be_set_error(adapter, BE_ERROR_EEH);
6079
6080                 be_cancel_err_detection(adapter);
6081
6082                 be_cleanup(adapter);
6083         }
6084
6085         if (state == pci_channel_io_perm_failure)
6086                 return PCI_ERS_RESULT_DISCONNECT;
6087
6088         pci_disable_device(pdev);
6089
6090         /* The error could cause the FW to trigger a flash debug dump.
6091          * Resetting the card while flash dump is in progress
6092          * can cause it not to recover; wait for it to finish.
6093          * Wait only for first function as it is needed only once per
6094          * adapter.
6095          */
6096         if (pdev->devfn == 0)
6097                 ssleep(30);
6098
6099         return PCI_ERS_RESULT_NEED_RESET;
6100 }
6101
6102 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6103 {
6104         struct be_adapter *adapter = pci_get_drvdata(pdev);
6105         int status;
6106
6107         dev_info(&adapter->pdev->dev, "EEH reset\n");
6108
6109         status = pci_enable_device(pdev);
6110         if (status)
6111                 return PCI_ERS_RESULT_DISCONNECT;
6112
6113         pci_set_master(pdev);
6114         pci_restore_state(pdev);
6115
6116         /* Check if card is ok and fw is ready */
6117         dev_info(&adapter->pdev->dev,
6118                  "Waiting for FW to be ready after EEH reset\n");
6119         status = be_fw_wait_ready(adapter);
6120         if (status)
6121                 return PCI_ERS_RESULT_DISCONNECT;
6122
6123         pci_cleanup_aer_uncorrect_error_status(pdev);
6124         be_clear_error(adapter, BE_CLEAR_ALL);
6125         return PCI_ERS_RESULT_RECOVERED;
6126 }
6127
6128 static void be_eeh_resume(struct pci_dev *pdev)
6129 {
6130         int status = 0;
6131         struct be_adapter *adapter = pci_get_drvdata(pdev);
6132
6133         dev_info(&adapter->pdev->dev, "EEH resume\n");
6134
6135         pci_save_state(pdev);
6136
6137         status = be_resume(adapter);
6138         if (status)
6139                 goto err;
6140
6141         be_roce_dev_add(adapter);
6142
6143         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6144         return;
6145 err:
6146         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6147 }
6148
6149 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6150 {
6151         struct be_adapter *adapter = pci_get_drvdata(pdev);
6152         struct be_resources vft_res = {0};
6153         int status;
6154
6155         if (!num_vfs)
6156                 be_vf_clear(adapter);
6157
6158         adapter->num_vfs = num_vfs;
6159
6160         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6161                 dev_warn(&pdev->dev,
6162                          "Cannot disable VFs while they are assigned\n");
6163                 return -EBUSY;
6164         }
6165
6166         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6167          * are equally distributed across the max-number of VFs. The user may
6168          * request only a subset of the max-vfs to be enabled.
6169          * Based on num_vfs, redistribute the resources across num_vfs so that
6170          * each VF will have access to more number of resources.
6171          * This facility is not available in BE3 FW.
6172          * Also, this is done by FW in Lancer chip.
6173          */
6174         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6175                 be_calculate_vf_res(adapter, adapter->num_vfs,
6176                                     &vft_res);
6177                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6178                                                  adapter->num_vfs, &vft_res);
6179                 if (status)
6180                         dev_err(&pdev->dev,
6181                                 "Failed to optimize SR-IOV resources\n");
6182         }
6183
6184         status = be_get_resources(adapter);
6185         if (status)
6186                 return be_cmd_status(status);
6187
6188         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6189         rtnl_lock();
6190         status = be_update_queues(adapter);
6191         rtnl_unlock();
6192         if (status)
6193                 return be_cmd_status(status);
6194
6195         if (adapter->num_vfs)
6196                 status = be_vf_setup(adapter);
6197
6198         if (!status)
6199                 return adapter->num_vfs;
6200
6201         return 0;
6202 }
6203
6204 static const struct pci_error_handlers be_eeh_handlers = {
6205         .error_detected = be_eeh_err_detected,
6206         .slot_reset = be_eeh_reset,
6207         .resume = be_eeh_resume,
6208 };
6209
6210 static struct pci_driver be_driver = {
6211         .name = DRV_NAME,
6212         .id_table = be_dev_ids,
6213         .probe = be_probe,
6214         .remove = be_remove,
6215         .suspend = be_suspend,
6216         .resume = be_pci_resume,
6217         .shutdown = be_shutdown,
6218         .sriov_configure = be_pci_sriov_configure,
6219         .err_handler = &be_eeh_handlers
6220 };
6221
6222 static int __init be_init_module(void)
6223 {
6224         int status;
6225
6226         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6227             rx_frag_size != 2048) {
6228                 printk(KERN_WARNING DRV_NAME
6229                         " : Module param rx_frag_size must be 2048/4096/8192."
6230                         " Using 2048\n");
6231                 rx_frag_size = 2048;
6232         }
6233
6234         if (num_vfs > 0) {
6235                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6236                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6237         }
6238
6239         be_wq = create_singlethread_workqueue("be_wq");
6240         if (!be_wq) {
6241                 pr_warn(DRV_NAME "workqueue creation failed\n");
6242                 return -1;
6243         }
6244
6245         be_err_recovery_workq =
6246                 create_singlethread_workqueue("be_err_recover");
6247         if (!be_err_recovery_workq)
6248                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6249
6250         status = pci_register_driver(&be_driver);
6251         if (status) {
6252                 destroy_workqueue(be_wq);
6253                 be_destroy_err_recovery_workq();
6254         }
6255         return status;
6256 }
6257 module_init(be_init_module);
6258
6259 static void __exit be_exit_module(void)
6260 {
6261         pci_unregister_driver(&be_driver);
6262
6263         be_destroy_err_recovery_workq();
6264
6265         if (be_wq)
6266                 destroy_workqueue(be_wq);
6267 }
6268 module_exit(be_exit_module);