GNU Linux-libre 4.14.332-gnu1
[releases.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         ACCESS_ONCE(*acc) = newacc;
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static void be_get_stats64(struct net_device *netdev,
651                            struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715 }
716
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719         struct net_device *netdev = adapter->netdev;
720
721         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722                 netif_carrier_off(netdev);
723                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724         }
725
726         if (link_status)
727                 netif_carrier_on(netdev);
728         else
729                 netif_carrier_off(netdev);
730
731         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736         if (skb->encapsulation)
737                 return skb_inner_transport_offset(skb) +
738                        inner_tcp_hdrlen(skb);
739         return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744         struct be_tx_stats *stats = tx_stats(txo);
745         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746         /* Account for headers which get duplicated in TSO pkt */
747         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748
749         u64_stats_update_begin(&stats->sync);
750         stats->tx_reqs++;
751         stats->tx_bytes += skb->len + dup_hdr_len;
752         stats->tx_pkts += tx_pkts;
753         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754                 stats->tx_vxlan_offload_pkts += tx_pkts;
755         u64_stats_update_end(&stats->sync);
756 }
757
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761         /* +1 for the header wrb */
762         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770         wrb->rsvd0 = 0;
771 }
772
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778         wrb->frag_pa_hi = 0;
779         wrb->frag_pa_lo = 0;
780         wrb->frag_len = 0;
781         wrb->rsvd0 = 0;
782 }
783
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785                                      struct sk_buff *skb)
786 {
787         u8 vlan_prio;
788         u16 vlan_tag;
789
790         vlan_tag = skb_vlan_tag_get(skb);
791         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792         /* If vlan priority provided by OS is NOT in available bmap */
793         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795                                 adapter->recommended_prio_bits;
796
797         return vlan_tag;
798 }
799
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803         return (inner_ip_hdr(skb)->version == 4) ?
804                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809         return (ip_hdr(skb)->version == 4) ?
810                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820         return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829                                        struct sk_buff *skb,
830                                        struct be_wrb_params *wrb_params)
831 {
832         u16 proto;
833
834         if (skb_is_gso(skb)) {
835                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
836                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840                 if (skb->encapsulation) {
841                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842                         proto = skb_inner_ip_proto(skb);
843                 } else {
844                         proto = skb_ip_proto(skb);
845                 }
846                 if (proto == IPPROTO_TCP)
847                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848                 else if (proto == IPPROTO_UDP)
849                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850         }
851
852         if (skb_vlan_tag_present(skb)) {
853                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855         }
856
857         BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861                          struct be_eth_hdr_wrb *hdr,
862                          struct be_wrb_params *wrb_params,
863                          struct sk_buff *skb)
864 {
865         memset(hdr, 0, sizeof(*hdr));
866
867         SET_TX_WRB_HDR_BITS(crc, hdr,
868                             BE_WRB_F_GET(wrb_params->features, CRC));
869         SET_TX_WRB_HDR_BITS(ipcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, IPCS));
871         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, TCPCS));
873         SET_TX_WRB_HDR_BITS(udpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, UDPCS));
875
876         SET_TX_WRB_HDR_BITS(lso, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO));
878         SET_TX_WRB_HDR_BITS(lso6, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO6));
880         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881
882         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883          * hack is not needed, the evt bit is set while ringing DB.
884          */
885         SET_TX_WRB_HDR_BITS(event, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887         SET_TX_WRB_HDR_BITS(vlan, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN));
889         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890
891         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893         SET_TX_WRB_HDR_BITS(mgmt, hdr,
894                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898                           bool unmap_single)
899 {
900         dma_addr_t dma;
901         u32 frag_len = le32_to_cpu(wrb->frag_len);
902
903
904         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905                 (u64)le32_to_cpu(wrb->frag_pa_lo);
906         if (frag_len) {
907                 if (unmap_single)
908                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909                 else
910                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911         }
912 }
913
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917         u32 head = txo->q.head;
918
919         queue_head_inc(&txo->q);
920         return head;
921 }
922
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925                                 struct be_tx_obj *txo,
926                                 struct be_wrb_params *wrb_params,
927                                 struct sk_buff *skb, u16 head)
928 {
929         u32 num_frags = skb_wrb_cnt(skb);
930         struct be_queue_info *txq = &txo->q;
931         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932
933         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934         be_dws_cpu_to_le(hdr, sizeof(*hdr));
935
936         BUG_ON(txo->sent_skb_list[head]);
937         txo->sent_skb_list[head] = skb;
938         txo->last_req_hdr = head;
939         atomic_add(num_frags, &txq->used);
940         txo->last_req_wrb_cnt = num_frags;
941         txo->pend_wrb_cnt += num_frags;
942 }
943
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946                                  int len)
947 {
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         wrb = queue_head_node(txq);
952         wrb_fill(wrb, busaddr, len);
953         queue_head_inc(txq);
954 }
955
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961                             struct be_tx_obj *txo, u32 head, bool map_single,
962                             u32 copied)
963 {
964         struct device *dev;
965         struct be_eth_wrb *wrb;
966         struct be_queue_info *txq = &txo->q;
967
968         dev = &adapter->pdev->dev;
969         txq->head = head;
970
971         /* skip the first wrb (hdr); it's not mapped */
972         queue_head_inc(txq);
973         while (copied) {
974                 wrb = queue_head_node(txq);
975                 unmap_tx_frag(dev, wrb, map_single);
976                 map_single = false;
977                 copied -= le32_to_cpu(wrb->frag_len);
978                 queue_head_inc(txq);
979         }
980
981         txq->head = head;
982 }
983
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989                            struct sk_buff *skb,
990                            struct be_wrb_params *wrb_params)
991 {
992         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993         struct device *dev = &adapter->pdev->dev;
994         struct be_queue_info *txq = &txo->q;
995         bool map_single = false;
996         u32 head = txq->head;
997         dma_addr_t busaddr;
998         int len;
999
1000         head = be_tx_get_wrb_hdr(txo);
1001
1002         if (skb->len > skb->data_len) {
1003                 len = skb_headlen(skb);
1004
1005                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006                 if (dma_mapping_error(dev, busaddr))
1007                         goto dma_err;
1008                 map_single = true;
1009                 be_tx_setup_wrb_frag(txo, busaddr, len);
1010                 copied += len;
1011         }
1012
1013         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015                 len = skb_frag_size(frag);
1016
1017                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018                 if (dma_mapping_error(dev, busaddr))
1019                         goto dma_err;
1020                 be_tx_setup_wrb_frag(txo, busaddr, len);
1021                 copied += len;
1022         }
1023
1024         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025
1026         be_tx_stats_update(txo, skb);
1027         return wrb_cnt;
1028
1029 dma_err:
1030         adapter->drv_stats.dma_map_errors++;
1031         be_xmit_restore(adapter, txo, head, map_single, copied);
1032         return 0;
1033 }
1034
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041                                              struct sk_buff *skb,
1042                                              struct be_wrb_params
1043                                              *wrb_params)
1044 {
1045         u16 vlan_tag = 0;
1046
1047         skb = skb_share_check(skb, GFP_ATOMIC);
1048         if (unlikely(!skb))
1049                 return skb;
1050
1051         if (skb_vlan_tag_present(skb))
1052                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053
1054         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055                 if (!vlan_tag)
1056                         vlan_tag = adapter->pvid;
1057                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058                  * skip VLAN insertion
1059                  */
1060                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061         }
1062
1063         if (vlan_tag) {
1064                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065                                                 vlan_tag);
1066                 if (unlikely(!skb))
1067                         return skb;
1068                 skb->vlan_tci = 0;
1069         }
1070
1071         /* Insert the outer VLAN, if any */
1072         if (adapter->qnq_vid) {
1073                 vlan_tag = adapter->qnq_vid;
1074                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075                                                 vlan_tag);
1076                 if (unlikely(!skb))
1077                         return skb;
1078                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079         }
1080
1081         return skb;
1082 }
1083
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086         struct ethhdr *eh = (struct ethhdr *)skb->data;
1087         u16 offset = ETH_HLEN;
1088
1089         if (eh->h_proto == htons(ETH_P_IPV6)) {
1090                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091
1092                 offset += sizeof(struct ipv6hdr);
1093                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1094                     ip6h->nexthdr != NEXTHDR_UDP) {
1095                         struct ipv6_opt_hdr *ehdr =
1096                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1097
1098                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099                         if (ehdr->hdrlen == 0xff)
1100                                 return true;
1101                 }
1102         }
1103         return false;
1104 }
1105
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117                                                   struct sk_buff *skb,
1118                                                   struct be_wrb_params
1119                                                   *wrb_params)
1120 {
1121         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122         unsigned int eth_hdr_len;
1123         struct iphdr *ip;
1124
1125         /* For padded packets, BE HW modifies tot_len field in IP header
1126          * incorrecly when VLAN tag is inserted by HW.
1127          * For padded packets, Lancer computes incorrect checksum.
1128          */
1129         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130                                                 VLAN_ETH_HLEN : ETH_HLEN;
1131         if (skb->len <= 60 &&
1132             (lancer_chip(adapter) || BE3_chip(adapter) ||
1133              skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1134                 ip = (struct iphdr *)ip_hdr(skb);
1135                 if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1136                         goto tx_drop;
1137         }
1138
1139         /* If vlan tag is already inlined in the packet, skip HW VLAN
1140          * tagging in pvid-tagging mode
1141          */
1142         if (be_pvid_tagging_enabled(adapter) &&
1143             veh->h_vlan_proto == htons(ETH_P_8021Q))
1144                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1145
1146         /* HW has a bug wherein it will calculate CSUM for VLAN
1147          * pkts even though it is disabled.
1148          * Manually insert VLAN in pkt.
1149          */
1150         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1151             skb_vlan_tag_present(skb)) {
1152                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1153                 if (unlikely(!skb))
1154                         goto err;
1155         }
1156
1157         /* HW may lockup when VLAN HW tagging is requested on
1158          * certain ipv6 packets. Drop such pkts if the HW workaround to
1159          * skip HW tagging is not enabled by FW.
1160          */
1161         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1162                      (adapter->pvid || adapter->qnq_vid) &&
1163                      !qnq_async_evt_rcvd(adapter)))
1164                 goto tx_drop;
1165
1166         /* Manual VLAN tag insertion to prevent:
1167          * ASIC lockup when the ASIC inserts VLAN tag into
1168          * certain ipv6 packets. Insert VLAN tags in driver,
1169          * and set event, completion, vlan bits accordingly
1170          * in the Tx WRB.
1171          */
1172         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1173             be_vlan_tag_tx_chk(adapter, skb)) {
1174                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1175                 if (unlikely(!skb))
1176                         goto err;
1177         }
1178
1179         return skb;
1180 tx_drop:
1181         dev_kfree_skb_any(skb);
1182 err:
1183         return NULL;
1184 }
1185
1186 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1187                                            struct sk_buff *skb,
1188                                            struct be_wrb_params *wrb_params)
1189 {
1190         int err;
1191
1192         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1193          * packets that are 32b or less may cause a transmit stall
1194          * on that port. The workaround is to pad such packets
1195          * (len <= 32 bytes) to a minimum length of 36b.
1196          */
1197         if (skb->len <= 32) {
1198                 if (skb_put_padto(skb, 36))
1199                         return NULL;
1200         }
1201
1202         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1203                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1204                 if (!skb)
1205                         return NULL;
1206         }
1207
1208         /* The stack can send us skbs with length greater than
1209          * what the HW can handle. Trim the extra bytes.
1210          */
1211         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1212         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1213         WARN_ON(err);
1214
1215         return skb;
1216 }
1217
1218 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1219 {
1220         struct be_queue_info *txq = &txo->q;
1221         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1222
1223         /* Mark the last request eventable if it hasn't been marked already */
1224         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1225                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1226
1227         /* compose a dummy wrb if there are odd set of wrbs to notify */
1228         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1229                 wrb_fill_dummy(queue_head_node(txq));
1230                 queue_head_inc(txq);
1231                 atomic_inc(&txq->used);
1232                 txo->pend_wrb_cnt++;
1233                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1234                                            TX_HDR_WRB_NUM_SHIFT);
1235                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1236                                           TX_HDR_WRB_NUM_SHIFT);
1237         }
1238         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1239         txo->pend_wrb_cnt = 0;
1240 }
1241
1242 /* OS2BMC related */
1243
1244 #define DHCP_CLIENT_PORT        68
1245 #define DHCP_SERVER_PORT        67
1246 #define NET_BIOS_PORT1          137
1247 #define NET_BIOS_PORT2          138
1248 #define DHCPV6_RAS_PORT         547
1249
1250 #define is_mc_allowed_on_bmc(adapter, eh)       \
1251         (!is_multicast_filt_enabled(adapter) && \
1252          is_multicast_ether_addr(eh->h_dest) && \
1253          !is_broadcast_ether_addr(eh->h_dest))
1254
1255 #define is_bc_allowed_on_bmc(adapter, eh)       \
1256         (!is_broadcast_filt_enabled(adapter) && \
1257          is_broadcast_ether_addr(eh->h_dest))
1258
1259 #define is_arp_allowed_on_bmc(adapter, skb)     \
1260         (is_arp(skb) && is_arp_filt_enabled(adapter))
1261
1262 #define is_broadcast_packet(eh, adapter)        \
1263                 (is_multicast_ether_addr(eh->h_dest) && \
1264                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1265
1266 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1267
1268 #define is_arp_filt_enabled(adapter)    \
1269                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1270
1271 #define is_dhcp_client_filt_enabled(adapter)    \
1272                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1273
1274 #define is_dhcp_srvr_filt_enabled(adapter)      \
1275                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1276
1277 #define is_nbios_filt_enabled(adapter)  \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1279
1280 #define is_ipv6_na_filt_enabled(adapter)        \
1281                 (adapter->bmc_filt_mask &       \
1282                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1283
1284 #define is_ipv6_ra_filt_enabled(adapter)        \
1285                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1286
1287 #define is_ipv6_ras_filt_enabled(adapter)       \
1288                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1289
1290 #define is_broadcast_filt_enabled(adapter)      \
1291                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1292
1293 #define is_multicast_filt_enabled(adapter)      \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1295
1296 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1297                                struct sk_buff **skb)
1298 {
1299         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1300         bool os2bmc = false;
1301
1302         if (!be_is_os2bmc_enabled(adapter))
1303                 goto done;
1304
1305         if (!is_multicast_ether_addr(eh->h_dest))
1306                 goto done;
1307
1308         if (is_mc_allowed_on_bmc(adapter, eh) ||
1309             is_bc_allowed_on_bmc(adapter, eh) ||
1310             is_arp_allowed_on_bmc(adapter, (*skb))) {
1311                 os2bmc = true;
1312                 goto done;
1313         }
1314
1315         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1316                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1317                 u8 nexthdr = hdr->nexthdr;
1318
1319                 if (nexthdr == IPPROTO_ICMPV6) {
1320                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1321
1322                         switch (icmp6->icmp6_type) {
1323                         case NDISC_ROUTER_ADVERTISEMENT:
1324                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1325                                 goto done;
1326                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1327                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1328                                 goto done;
1329                         default:
1330                                 break;
1331                         }
1332                 }
1333         }
1334
1335         if (is_udp_pkt((*skb))) {
1336                 struct udphdr *udp = udp_hdr((*skb));
1337
1338                 switch (ntohs(udp->dest)) {
1339                 case DHCP_CLIENT_PORT:
1340                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1341                         goto done;
1342                 case DHCP_SERVER_PORT:
1343                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1344                         goto done;
1345                 case NET_BIOS_PORT1:
1346                 case NET_BIOS_PORT2:
1347                         os2bmc = is_nbios_filt_enabled(adapter);
1348                         goto done;
1349                 case DHCPV6_RAS_PORT:
1350                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1351                         goto done;
1352                 default:
1353                         break;
1354                 }
1355         }
1356 done:
1357         /* For packets over a vlan, which are destined
1358          * to BMC, asic expects the vlan to be inline in the packet.
1359          */
1360         if (os2bmc)
1361                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1362
1363         return os2bmc;
1364 }
1365
1366 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1367 {
1368         struct be_adapter *adapter = netdev_priv(netdev);
1369         u16 q_idx = skb_get_queue_mapping(skb);
1370         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1371         struct be_wrb_params wrb_params = { 0 };
1372         bool flush = !skb->xmit_more;
1373         u16 wrb_cnt;
1374
1375         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1376         if (unlikely(!skb))
1377                 goto drop;
1378
1379         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1380
1381         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1382         if (unlikely(!wrb_cnt)) {
1383                 dev_kfree_skb_any(skb);
1384                 goto drop;
1385         }
1386
1387         /* if os2bmc is enabled and if the pkt is destined to bmc,
1388          * enqueue the pkt a 2nd time with mgmt bit set.
1389          */
1390         if (be_send_pkt_to_bmc(adapter, &skb)) {
1391                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1392                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393                 if (unlikely(!wrb_cnt))
1394                         goto drop;
1395                 else
1396                         skb_get(skb);
1397         }
1398
1399         if (be_is_txq_full(txo)) {
1400                 netif_stop_subqueue(netdev, q_idx);
1401                 tx_stats(txo)->tx_stops++;
1402         }
1403
1404         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1405                 be_xmit_flush(adapter, txo);
1406
1407         return NETDEV_TX_OK;
1408 drop:
1409         tx_stats(txo)->tx_drv_drops++;
1410         /* Flush the already enqueued tx requests */
1411         if (flush && txo->pend_wrb_cnt)
1412                 be_xmit_flush(adapter, txo);
1413
1414         return NETDEV_TX_OK;
1415 }
1416
1417 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1418 {
1419         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1420                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1421 }
1422
1423 static int be_set_vlan_promisc(struct be_adapter *adapter)
1424 {
1425         struct device *dev = &adapter->pdev->dev;
1426         int status;
1427
1428         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1429                 return 0;
1430
1431         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1432         if (!status) {
1433                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1434                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1435         } else {
1436                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1437         }
1438         return status;
1439 }
1440
1441 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1442 {
1443         struct device *dev = &adapter->pdev->dev;
1444         int status;
1445
1446         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1447         if (!status) {
1448                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1449                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1450         }
1451         return status;
1452 }
1453
1454 /*
1455  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1456  * If the user configures more, place BE in vlan promiscuous mode.
1457  */
1458 static int be_vid_config(struct be_adapter *adapter)
1459 {
1460         struct device *dev = &adapter->pdev->dev;
1461         u16 vids[BE_NUM_VLANS_SUPPORTED];
1462         u16 num = 0, i = 0;
1463         int status = 0;
1464
1465         /* No need to change the VLAN state if the I/F is in promiscuous */
1466         if (adapter->netdev->flags & IFF_PROMISC)
1467                 return 0;
1468
1469         if (adapter->vlans_added > be_max_vlans(adapter))
1470                 return be_set_vlan_promisc(adapter);
1471
1472         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1473                 status = be_clear_vlan_promisc(adapter);
1474                 if (status)
1475                         return status;
1476         }
1477         /* Construct VLAN Table to give to HW */
1478         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1479                 vids[num++] = cpu_to_le16(i);
1480
1481         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1482         if (status) {
1483                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1484                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1485                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1486                     addl_status(status) ==
1487                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1488                         return be_set_vlan_promisc(adapter);
1489         }
1490         return status;
1491 }
1492
1493 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1494 {
1495         struct be_adapter *adapter = netdev_priv(netdev);
1496         int status = 0;
1497
1498         mutex_lock(&adapter->rx_filter_lock);
1499
1500         /* Packets with VID 0 are always received by Lancer by default */
1501         if (lancer_chip(adapter) && vid == 0)
1502                 goto done;
1503
1504         if (test_bit(vid, adapter->vids))
1505                 goto done;
1506
1507         set_bit(vid, adapter->vids);
1508         adapter->vlans_added++;
1509
1510         status = be_vid_config(adapter);
1511 done:
1512         mutex_unlock(&adapter->rx_filter_lock);
1513         return status;
1514 }
1515
1516 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1517 {
1518         struct be_adapter *adapter = netdev_priv(netdev);
1519         int status = 0;
1520
1521         mutex_lock(&adapter->rx_filter_lock);
1522
1523         /* Packets with VID 0 are always received by Lancer by default */
1524         if (lancer_chip(adapter) && vid == 0)
1525                 goto done;
1526
1527         if (!test_bit(vid, adapter->vids))
1528                 goto done;
1529
1530         clear_bit(vid, adapter->vids);
1531         adapter->vlans_added--;
1532
1533         status = be_vid_config(adapter);
1534 done:
1535         mutex_unlock(&adapter->rx_filter_lock);
1536         return status;
1537 }
1538
1539 static void be_set_all_promisc(struct be_adapter *adapter)
1540 {
1541         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1542         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1543 }
1544
1545 static void be_set_mc_promisc(struct be_adapter *adapter)
1546 {
1547         int status;
1548
1549         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1550                 return;
1551
1552         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1553         if (!status)
1554                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1555 }
1556
1557 static void be_set_uc_promisc(struct be_adapter *adapter)
1558 {
1559         int status;
1560
1561         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1562                 return;
1563
1564         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1565         if (!status)
1566                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1567 }
1568
1569 static void be_clear_uc_promisc(struct be_adapter *adapter)
1570 {
1571         int status;
1572
1573         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1574                 return;
1575
1576         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1577         if (!status)
1578                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1579 }
1580
1581 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1582  * We use a single callback function for both sync and unsync. We really don't
1583  * add/remove addresses through this callback. But, we use it to detect changes
1584  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1585  */
1586 static int be_uc_list_update(struct net_device *netdev,
1587                              const unsigned char *addr)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590
1591         adapter->update_uc_list = true;
1592         return 0;
1593 }
1594
1595 static int be_mc_list_update(struct net_device *netdev,
1596                              const unsigned char *addr)
1597 {
1598         struct be_adapter *adapter = netdev_priv(netdev);
1599
1600         adapter->update_mc_list = true;
1601         return 0;
1602 }
1603
1604 static void be_set_mc_list(struct be_adapter *adapter)
1605 {
1606         struct net_device *netdev = adapter->netdev;
1607         struct netdev_hw_addr *ha;
1608         bool mc_promisc = false;
1609         int status;
1610
1611         netif_addr_lock_bh(netdev);
1612         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1613
1614         if (netdev->flags & IFF_PROMISC) {
1615                 adapter->update_mc_list = false;
1616         } else if (netdev->flags & IFF_ALLMULTI ||
1617                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1618                 /* Enable multicast promisc if num configured exceeds
1619                  * what we support
1620                  */
1621                 mc_promisc = true;
1622                 adapter->update_mc_list = false;
1623         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1624                 /* Update mc-list unconditionally if the iface was previously
1625                  * in mc-promisc mode and now is out of that mode.
1626                  */
1627                 adapter->update_mc_list = true;
1628         }
1629
1630         if (adapter->update_mc_list) {
1631                 int i = 0;
1632
1633                 /* cache the mc-list in adapter */
1634                 netdev_for_each_mc_addr(ha, netdev) {
1635                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1636                         i++;
1637                 }
1638                 adapter->mc_count = netdev_mc_count(netdev);
1639         }
1640         netif_addr_unlock_bh(netdev);
1641
1642         if (mc_promisc) {
1643                 be_set_mc_promisc(adapter);
1644         } else if (adapter->update_mc_list) {
1645                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1646                 if (!status)
1647                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1648                 else
1649                         be_set_mc_promisc(adapter);
1650
1651                 adapter->update_mc_list = false;
1652         }
1653 }
1654
1655 static void be_clear_mc_list(struct be_adapter *adapter)
1656 {
1657         struct net_device *netdev = adapter->netdev;
1658
1659         __dev_mc_unsync(netdev, NULL);
1660         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1661         adapter->mc_count = 0;
1662 }
1663
1664 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1665 {
1666         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1667                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668                 return 0;
1669         }
1670
1671         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1672                                adapter->if_handle,
1673                                &adapter->pmac_id[uc_idx + 1], 0);
1674 }
1675
1676 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1677 {
1678         if (pmac_id == adapter->pmac_id[0])
1679                 return;
1680
1681         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1682 }
1683
1684 static void be_set_uc_list(struct be_adapter *adapter)
1685 {
1686         struct net_device *netdev = adapter->netdev;
1687         struct netdev_hw_addr *ha;
1688         bool uc_promisc = false;
1689         int curr_uc_macs = 0, i;
1690
1691         netif_addr_lock_bh(netdev);
1692         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1693
1694         if (netdev->flags & IFF_PROMISC) {
1695                 adapter->update_uc_list = false;
1696         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1697                 uc_promisc = true;
1698                 adapter->update_uc_list = false;
1699         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1700                 /* Update uc-list unconditionally if the iface was previously
1701                  * in uc-promisc mode and now is out of that mode.
1702                  */
1703                 adapter->update_uc_list = true;
1704         }
1705
1706         if (adapter->update_uc_list) {
1707                 /* cache the uc-list in adapter array */
1708                 i = 0;
1709                 netdev_for_each_uc_addr(ha, netdev) {
1710                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1711                         i++;
1712                 }
1713                 curr_uc_macs = netdev_uc_count(netdev);
1714         }
1715         netif_addr_unlock_bh(netdev);
1716
1717         if (uc_promisc) {
1718                 be_set_uc_promisc(adapter);
1719         } else if (adapter->update_uc_list) {
1720                 be_clear_uc_promisc(adapter);
1721
1722                 for (i = 0; i < adapter->uc_macs; i++)
1723                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1724
1725                 for (i = 0; i < curr_uc_macs; i++)
1726                         be_uc_mac_add(adapter, i);
1727                 adapter->uc_macs = curr_uc_macs;
1728                 adapter->update_uc_list = false;
1729         }
1730 }
1731
1732 static void be_clear_uc_list(struct be_adapter *adapter)
1733 {
1734         struct net_device *netdev = adapter->netdev;
1735         int i;
1736
1737         __dev_uc_unsync(netdev, NULL);
1738         for (i = 0; i < adapter->uc_macs; i++)
1739                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1740
1741         adapter->uc_macs = 0;
1742 }
1743
1744 static void __be_set_rx_mode(struct be_adapter *adapter)
1745 {
1746         struct net_device *netdev = adapter->netdev;
1747
1748         mutex_lock(&adapter->rx_filter_lock);
1749
1750         if (netdev->flags & IFF_PROMISC) {
1751                 if (!be_in_all_promisc(adapter))
1752                         be_set_all_promisc(adapter);
1753         } else if (be_in_all_promisc(adapter)) {
1754                 /* We need to re-program the vlan-list or clear
1755                  * vlan-promisc mode (if needed) when the interface
1756                  * comes out of promisc mode.
1757                  */
1758                 be_vid_config(adapter);
1759         }
1760
1761         be_set_uc_list(adapter);
1762         be_set_mc_list(adapter);
1763
1764         mutex_unlock(&adapter->rx_filter_lock);
1765 }
1766
1767 static void be_work_set_rx_mode(struct work_struct *work)
1768 {
1769         struct be_cmd_work *cmd_work =
1770                                 container_of(work, struct be_cmd_work, work);
1771
1772         __be_set_rx_mode(cmd_work->adapter);
1773         kfree(cmd_work);
1774 }
1775
1776 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1777 {
1778         struct be_adapter *adapter = netdev_priv(netdev);
1779         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1780         int status;
1781
1782         if (!sriov_enabled(adapter))
1783                 return -EPERM;
1784
1785         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1786                 return -EINVAL;
1787
1788         /* Proceed further only if user provided MAC is different
1789          * from active MAC
1790          */
1791         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1792                 return 0;
1793
1794         if (BEx_chip(adapter)) {
1795                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1796                                 vf + 1);
1797
1798                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1799                                          &vf_cfg->pmac_id, vf + 1);
1800         } else {
1801                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1802                                         vf + 1);
1803         }
1804
1805         if (status) {
1806                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1807                         mac, vf, status);
1808                 return be_cmd_status(status);
1809         }
1810
1811         ether_addr_copy(vf_cfg->mac_addr, mac);
1812
1813         return 0;
1814 }
1815
1816 static int be_get_vf_config(struct net_device *netdev, int vf,
1817                             struct ifla_vf_info *vi)
1818 {
1819         struct be_adapter *adapter = netdev_priv(netdev);
1820         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1821
1822         if (!sriov_enabled(adapter))
1823                 return -EPERM;
1824
1825         if (vf >= adapter->num_vfs)
1826                 return -EINVAL;
1827
1828         vi->vf = vf;
1829         vi->max_tx_rate = vf_cfg->tx_rate;
1830         vi->min_tx_rate = 0;
1831         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1832         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1833         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1834         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1835         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1836
1837         return 0;
1838 }
1839
1840 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1841 {
1842         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1843         u16 vids[BE_NUM_VLANS_SUPPORTED];
1844         int vf_if_id = vf_cfg->if_handle;
1845         int status;
1846
1847         /* Enable Transparent VLAN Tagging */
1848         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1849         if (status)
1850                 return status;
1851
1852         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1853         vids[0] = 0;
1854         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1855         if (!status)
1856                 dev_info(&adapter->pdev->dev,
1857                          "Cleared guest VLANs on VF%d", vf);
1858
1859         /* After TVT is enabled, disallow VFs to program VLAN filters */
1860         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1861                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1862                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1863                 if (!status)
1864                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1865         }
1866         return 0;
1867 }
1868
1869 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1870 {
1871         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1872         struct device *dev = &adapter->pdev->dev;
1873         int status;
1874
1875         /* Reset Transparent VLAN Tagging. */
1876         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1877                                        vf_cfg->if_handle, 0, 0);
1878         if (status)
1879                 return status;
1880
1881         /* Allow VFs to program VLAN filtering */
1882         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1883                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1884                                                   BE_PRIV_FILTMGMT, vf + 1);
1885                 if (!status) {
1886                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1887                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1888                 }
1889         }
1890
1891         dev_info(dev,
1892                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1893         return 0;
1894 }
1895
1896 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1897                           __be16 vlan_proto)
1898 {
1899         struct be_adapter *adapter = netdev_priv(netdev);
1900         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901         int status;
1902
1903         if (!sriov_enabled(adapter))
1904                 return -EPERM;
1905
1906         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1907                 return -EINVAL;
1908
1909         if (vlan_proto != htons(ETH_P_8021Q))
1910                 return -EPROTONOSUPPORT;
1911
1912         if (vlan || qos) {
1913                 vlan |= qos << VLAN_PRIO_SHIFT;
1914                 status = be_set_vf_tvt(adapter, vf, vlan);
1915         } else {
1916                 status = be_clear_vf_tvt(adapter, vf);
1917         }
1918
1919         if (status) {
1920                 dev_err(&adapter->pdev->dev,
1921                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1922                         status);
1923                 return be_cmd_status(status);
1924         }
1925
1926         vf_cfg->vlan_tag = vlan;
1927         return 0;
1928 }
1929
1930 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1931                              int min_tx_rate, int max_tx_rate)
1932 {
1933         struct be_adapter *adapter = netdev_priv(netdev);
1934         struct device *dev = &adapter->pdev->dev;
1935         int percent_rate, status = 0;
1936         u16 link_speed = 0;
1937         u8 link_status;
1938
1939         if (!sriov_enabled(adapter))
1940                 return -EPERM;
1941
1942         if (vf >= adapter->num_vfs)
1943                 return -EINVAL;
1944
1945         if (min_tx_rate)
1946                 return -EINVAL;
1947
1948         if (!max_tx_rate)
1949                 goto config_qos;
1950
1951         status = be_cmd_link_status_query(adapter, &link_speed,
1952                                           &link_status, 0);
1953         if (status)
1954                 goto err;
1955
1956         if (!link_status) {
1957                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1958                 status = -ENETDOWN;
1959                 goto err;
1960         }
1961
1962         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1963                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1964                         link_speed);
1965                 status = -EINVAL;
1966                 goto err;
1967         }
1968
1969         /* On Skyhawk the QOS setting must be done only as a % value */
1970         percent_rate = link_speed / 100;
1971         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1972                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1973                         percent_rate);
1974                 status = -EINVAL;
1975                 goto err;
1976         }
1977
1978 config_qos:
1979         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1980         if (status)
1981                 goto err;
1982
1983         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1984         return 0;
1985
1986 err:
1987         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1988                 max_tx_rate, vf);
1989         return be_cmd_status(status);
1990 }
1991
1992 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1993                                 int link_state)
1994 {
1995         struct be_adapter *adapter = netdev_priv(netdev);
1996         int status;
1997
1998         if (!sriov_enabled(adapter))
1999                 return -EPERM;
2000
2001         if (vf >= adapter->num_vfs)
2002                 return -EINVAL;
2003
2004         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2005         if (status) {
2006                 dev_err(&adapter->pdev->dev,
2007                         "Link state change on VF %d failed: %#x\n", vf, status);
2008                 return be_cmd_status(status);
2009         }
2010
2011         adapter->vf_cfg[vf].plink_tracking = link_state;
2012
2013         return 0;
2014 }
2015
2016 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2017 {
2018         struct be_adapter *adapter = netdev_priv(netdev);
2019         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2020         u8 spoofchk;
2021         int status;
2022
2023         if (!sriov_enabled(adapter))
2024                 return -EPERM;
2025
2026         if (vf >= adapter->num_vfs)
2027                 return -EINVAL;
2028
2029         if (BEx_chip(adapter))
2030                 return -EOPNOTSUPP;
2031
2032         if (enable == vf_cfg->spoofchk)
2033                 return 0;
2034
2035         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2036
2037         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2038                                        0, spoofchk);
2039         if (status) {
2040                 dev_err(&adapter->pdev->dev,
2041                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2042                 return be_cmd_status(status);
2043         }
2044
2045         vf_cfg->spoofchk = enable;
2046         return 0;
2047 }
2048
2049 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2050                           ulong now)
2051 {
2052         aic->rx_pkts_prev = rx_pkts;
2053         aic->tx_reqs_prev = tx_pkts;
2054         aic->jiffies = now;
2055 }
2056
2057 static int be_get_new_eqd(struct be_eq_obj *eqo)
2058 {
2059         struct be_adapter *adapter = eqo->adapter;
2060         int eqd, start;
2061         struct be_aic_obj *aic;
2062         struct be_rx_obj *rxo;
2063         struct be_tx_obj *txo;
2064         u64 rx_pkts = 0, tx_pkts = 0;
2065         ulong now;
2066         u32 pps, delta;
2067         int i;
2068
2069         aic = &adapter->aic_obj[eqo->idx];
2070         if (!aic->enable) {
2071                 if (aic->jiffies)
2072                         aic->jiffies = 0;
2073                 eqd = aic->et_eqd;
2074                 return eqd;
2075         }
2076
2077         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2078                 do {
2079                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2080                         rx_pkts += rxo->stats.rx_pkts;
2081                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2082         }
2083
2084         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2085                 do {
2086                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2087                         tx_pkts += txo->stats.tx_reqs;
2088                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2089         }
2090
2091         /* Skip, if wrapped around or first calculation */
2092         now = jiffies;
2093         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2094             rx_pkts < aic->rx_pkts_prev ||
2095             tx_pkts < aic->tx_reqs_prev) {
2096                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2097                 return aic->prev_eqd;
2098         }
2099
2100         delta = jiffies_to_msecs(now - aic->jiffies);
2101         if (delta == 0)
2102                 return aic->prev_eqd;
2103
2104         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2105                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2106         eqd = (pps / 15000) << 2;
2107
2108         if (eqd < 8)
2109                 eqd = 0;
2110         eqd = min_t(u32, eqd, aic->max_eqd);
2111         eqd = max_t(u32, eqd, aic->min_eqd);
2112
2113         be_aic_update(aic, rx_pkts, tx_pkts, now);
2114
2115         return eqd;
2116 }
2117
2118 /* For Skyhawk-R only */
2119 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2120 {
2121         struct be_adapter *adapter = eqo->adapter;
2122         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2123         ulong now = jiffies;
2124         int eqd;
2125         u32 mult_enc;
2126
2127         if (!aic->enable)
2128                 return 0;
2129
2130         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2131                 eqd = aic->prev_eqd;
2132         else
2133                 eqd = be_get_new_eqd(eqo);
2134
2135         if (eqd > 100)
2136                 mult_enc = R2I_DLY_ENC_1;
2137         else if (eqd > 60)
2138                 mult_enc = R2I_DLY_ENC_2;
2139         else if (eqd > 20)
2140                 mult_enc = R2I_DLY_ENC_3;
2141         else
2142                 mult_enc = R2I_DLY_ENC_0;
2143
2144         aic->prev_eqd = eqd;
2145
2146         return mult_enc;
2147 }
2148
2149 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2150 {
2151         struct be_set_eqd set_eqd[MAX_EVT_QS];
2152         struct be_aic_obj *aic;
2153         struct be_eq_obj *eqo;
2154         int i, num = 0, eqd;
2155
2156         for_all_evt_queues(adapter, eqo, i) {
2157                 aic = &adapter->aic_obj[eqo->idx];
2158                 eqd = be_get_new_eqd(eqo);
2159                 if (force_update || eqd != aic->prev_eqd) {
2160                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2161                         set_eqd[num].eq_id = eqo->q.id;
2162                         aic->prev_eqd = eqd;
2163                         num++;
2164                 }
2165         }
2166
2167         if (num)
2168                 be_cmd_modify_eqd(adapter, set_eqd, num);
2169 }
2170
2171 static void be_rx_stats_update(struct be_rx_obj *rxo,
2172                                struct be_rx_compl_info *rxcp)
2173 {
2174         struct be_rx_stats *stats = rx_stats(rxo);
2175
2176         u64_stats_update_begin(&stats->sync);
2177         stats->rx_compl++;
2178         stats->rx_bytes += rxcp->pkt_size;
2179         stats->rx_pkts++;
2180         if (rxcp->tunneled)
2181                 stats->rx_vxlan_offload_pkts++;
2182         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2183                 stats->rx_mcast_pkts++;
2184         if (rxcp->err)
2185                 stats->rx_compl_err++;
2186         u64_stats_update_end(&stats->sync);
2187 }
2188
2189 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2190 {
2191         /* L4 checksum is not reliable for non TCP/UDP packets.
2192          * Also ignore ipcksm for ipv6 pkts
2193          */
2194         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2195                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2196 }
2197
2198 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2199 {
2200         struct be_adapter *adapter = rxo->adapter;
2201         struct be_rx_page_info *rx_page_info;
2202         struct be_queue_info *rxq = &rxo->q;
2203         u32 frag_idx = rxq->tail;
2204
2205         rx_page_info = &rxo->page_info_tbl[frag_idx];
2206         BUG_ON(!rx_page_info->page);
2207
2208         if (rx_page_info->last_frag) {
2209                 dma_unmap_page(&adapter->pdev->dev,
2210                                dma_unmap_addr(rx_page_info, bus),
2211                                adapter->big_page_size, DMA_FROM_DEVICE);
2212                 rx_page_info->last_frag = false;
2213         } else {
2214                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2215                                         dma_unmap_addr(rx_page_info, bus),
2216                                         rx_frag_size, DMA_FROM_DEVICE);
2217         }
2218
2219         queue_tail_inc(rxq);
2220         atomic_dec(&rxq->used);
2221         return rx_page_info;
2222 }
2223
2224 /* Throwaway the data in the Rx completion */
2225 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2226                                 struct be_rx_compl_info *rxcp)
2227 {
2228         struct be_rx_page_info *page_info;
2229         u16 i, num_rcvd = rxcp->num_rcvd;
2230
2231         for (i = 0; i < num_rcvd; i++) {
2232                 page_info = get_rx_page_info(rxo);
2233                 put_page(page_info->page);
2234                 memset(page_info, 0, sizeof(*page_info));
2235         }
2236 }
2237
2238 /*
2239  * skb_fill_rx_data forms a complete skb for an ether frame
2240  * indicated by rxcp.
2241  */
2242 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2243                              struct be_rx_compl_info *rxcp)
2244 {
2245         struct be_rx_page_info *page_info;
2246         u16 i, j;
2247         u16 hdr_len, curr_frag_len, remaining;
2248         u8 *start;
2249
2250         page_info = get_rx_page_info(rxo);
2251         start = page_address(page_info->page) + page_info->page_offset;
2252         prefetch(start);
2253
2254         /* Copy data in the first descriptor of this completion */
2255         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2256
2257         skb->len = curr_frag_len;
2258         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2259                 memcpy(skb->data, start, curr_frag_len);
2260                 /* Complete packet has now been moved to data */
2261                 put_page(page_info->page);
2262                 skb->data_len = 0;
2263                 skb->tail += curr_frag_len;
2264         } else {
2265                 hdr_len = ETH_HLEN;
2266                 memcpy(skb->data, start, hdr_len);
2267                 skb_shinfo(skb)->nr_frags = 1;
2268                 skb_frag_set_page(skb, 0, page_info->page);
2269                 skb_shinfo(skb)->frags[0].page_offset =
2270                                         page_info->page_offset + hdr_len;
2271                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2272                                   curr_frag_len - hdr_len);
2273                 skb->data_len = curr_frag_len - hdr_len;
2274                 skb->truesize += rx_frag_size;
2275                 skb->tail += hdr_len;
2276         }
2277         page_info->page = NULL;
2278
2279         if (rxcp->pkt_size <= rx_frag_size) {
2280                 BUG_ON(rxcp->num_rcvd != 1);
2281                 return;
2282         }
2283
2284         /* More frags present for this completion */
2285         remaining = rxcp->pkt_size - curr_frag_len;
2286         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2287                 page_info = get_rx_page_info(rxo);
2288                 curr_frag_len = min(remaining, rx_frag_size);
2289
2290                 /* Coalesce all frags from the same physical page in one slot */
2291                 if (page_info->page_offset == 0) {
2292                         /* Fresh page */
2293                         j++;
2294                         skb_frag_set_page(skb, j, page_info->page);
2295                         skb_shinfo(skb)->frags[j].page_offset =
2296                                                         page_info->page_offset;
2297                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2298                         skb_shinfo(skb)->nr_frags++;
2299                 } else {
2300                         put_page(page_info->page);
2301                 }
2302
2303                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2304                 skb->len += curr_frag_len;
2305                 skb->data_len += curr_frag_len;
2306                 skb->truesize += rx_frag_size;
2307                 remaining -= curr_frag_len;
2308                 page_info->page = NULL;
2309         }
2310         BUG_ON(j > MAX_SKB_FRAGS);
2311 }
2312
2313 /* Process the RX completion indicated by rxcp when GRO is disabled */
2314 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2315                                 struct be_rx_compl_info *rxcp)
2316 {
2317         struct be_adapter *adapter = rxo->adapter;
2318         struct net_device *netdev = adapter->netdev;
2319         struct sk_buff *skb;
2320
2321         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2322         if (unlikely(!skb)) {
2323                 rx_stats(rxo)->rx_drops_no_skbs++;
2324                 be_rx_compl_discard(rxo, rxcp);
2325                 return;
2326         }
2327
2328         skb_fill_rx_data(rxo, skb, rxcp);
2329
2330         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2331                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2332         else
2333                 skb_checksum_none_assert(skb);
2334
2335         skb->protocol = eth_type_trans(skb, netdev);
2336         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2337         if (netdev->features & NETIF_F_RXHASH)
2338                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2339
2340         skb->csum_level = rxcp->tunneled;
2341         skb_mark_napi_id(skb, napi);
2342
2343         if (rxcp->vlanf)
2344                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2345
2346         netif_receive_skb(skb);
2347 }
2348
2349 /* Process the RX completion indicated by rxcp when GRO is enabled */
2350 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2351                                     struct napi_struct *napi,
2352                                     struct be_rx_compl_info *rxcp)
2353 {
2354         struct be_adapter *adapter = rxo->adapter;
2355         struct be_rx_page_info *page_info;
2356         struct sk_buff *skb = NULL;
2357         u16 remaining, curr_frag_len;
2358         u16 i, j;
2359
2360         skb = napi_get_frags(napi);
2361         if (!skb) {
2362                 be_rx_compl_discard(rxo, rxcp);
2363                 return;
2364         }
2365
2366         remaining = rxcp->pkt_size;
2367         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2368                 page_info = get_rx_page_info(rxo);
2369
2370                 curr_frag_len = min(remaining, rx_frag_size);
2371
2372                 /* Coalesce all frags from the same physical page in one slot */
2373                 if (i == 0 || page_info->page_offset == 0) {
2374                         /* First frag or Fresh page */
2375                         j++;
2376                         skb_frag_set_page(skb, j, page_info->page);
2377                         skb_shinfo(skb)->frags[j].page_offset =
2378                                                         page_info->page_offset;
2379                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2380                 } else {
2381                         put_page(page_info->page);
2382                 }
2383                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384                 skb->truesize += rx_frag_size;
2385                 remaining -= curr_frag_len;
2386                 memset(page_info, 0, sizeof(*page_info));
2387         }
2388         BUG_ON(j > MAX_SKB_FRAGS);
2389
2390         skb_shinfo(skb)->nr_frags = j + 1;
2391         skb->len = rxcp->pkt_size;
2392         skb->data_len = rxcp->pkt_size;
2393         skb->ip_summed = CHECKSUM_UNNECESSARY;
2394         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2395         if (adapter->netdev->features & NETIF_F_RXHASH)
2396                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2397
2398         skb->csum_level = rxcp->tunneled;
2399
2400         if (rxcp->vlanf)
2401                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2402
2403         napi_gro_frags(napi);
2404 }
2405
2406 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2407                                  struct be_rx_compl_info *rxcp)
2408 {
2409         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2410         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2411         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2412         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2413         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2414         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2415         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2416         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2417         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2418         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2419         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2420         if (rxcp->vlanf) {
2421                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2422                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2423         }
2424         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2425         rxcp->tunneled =
2426                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2427 }
2428
2429 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2430                                  struct be_rx_compl_info *rxcp)
2431 {
2432         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2433         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2434         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2435         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2436         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2437         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2438         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2439         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2440         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2441         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2442         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2443         if (rxcp->vlanf) {
2444                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2445                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2446         }
2447         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2448         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2449 }
2450
2451 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2452 {
2453         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2454         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2455         struct be_adapter *adapter = rxo->adapter;
2456
2457         /* For checking the valid bit it is Ok to use either definition as the
2458          * valid bit is at the same position in both v0 and v1 Rx compl */
2459         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2460                 return NULL;
2461
2462         rmb();
2463         be_dws_le_to_cpu(compl, sizeof(*compl));
2464
2465         if (adapter->be3_native)
2466                 be_parse_rx_compl_v1(compl, rxcp);
2467         else
2468                 be_parse_rx_compl_v0(compl, rxcp);
2469
2470         if (rxcp->ip_frag)
2471                 rxcp->l4_csum = 0;
2472
2473         if (rxcp->vlanf) {
2474                 /* In QNQ modes, if qnq bit is not set, then the packet was
2475                  * tagged only with the transparent outer vlan-tag and must
2476                  * not be treated as a vlan packet by host
2477                  */
2478                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2479                         rxcp->vlanf = 0;
2480
2481                 if (!lancer_chip(adapter))
2482                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2483
2484                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2485                     !test_bit(rxcp->vlan_tag, adapter->vids))
2486                         rxcp->vlanf = 0;
2487         }
2488
2489         /* As the compl has been parsed, reset it; we wont touch it again */
2490         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2491
2492         queue_tail_inc(&rxo->cq);
2493         return rxcp;
2494 }
2495
2496 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2497 {
2498         u32 order = get_order(size);
2499
2500         if (order > 0)
2501                 gfp |= __GFP_COMP;
2502         return  alloc_pages(gfp, order);
2503 }
2504
2505 /*
2506  * Allocate a page, split it to fragments of size rx_frag_size and post as
2507  * receive buffers to BE
2508  */
2509 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2510 {
2511         struct be_adapter *adapter = rxo->adapter;
2512         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2513         struct be_queue_info *rxq = &rxo->q;
2514         struct page *pagep = NULL;
2515         struct device *dev = &adapter->pdev->dev;
2516         struct be_eth_rx_d *rxd;
2517         u64 page_dmaaddr = 0, frag_dmaaddr;
2518         u32 posted, page_offset = 0, notify = 0;
2519
2520         page_info = &rxo->page_info_tbl[rxq->head];
2521         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2522                 if (!pagep) {
2523                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2524                         if (unlikely(!pagep)) {
2525                                 rx_stats(rxo)->rx_post_fail++;
2526                                 break;
2527                         }
2528                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2529                                                     adapter->big_page_size,
2530                                                     DMA_FROM_DEVICE);
2531                         if (dma_mapping_error(dev, page_dmaaddr)) {
2532                                 put_page(pagep);
2533                                 pagep = NULL;
2534                                 adapter->drv_stats.dma_map_errors++;
2535                                 break;
2536                         }
2537                         page_offset = 0;
2538                 } else {
2539                         get_page(pagep);
2540                         page_offset += rx_frag_size;
2541                 }
2542                 page_info->page_offset = page_offset;
2543                 page_info->page = pagep;
2544
2545                 rxd = queue_head_node(rxq);
2546                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2547                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2548                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2549
2550                 /* Any space left in the current big page for another frag? */
2551                 if ((page_offset + rx_frag_size + rx_frag_size) >
2552                                         adapter->big_page_size) {
2553                         pagep = NULL;
2554                         page_info->last_frag = true;
2555                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2556                 } else {
2557                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2558                 }
2559
2560                 prev_page_info = page_info;
2561                 queue_head_inc(rxq);
2562                 page_info = &rxo->page_info_tbl[rxq->head];
2563         }
2564
2565         /* Mark the last frag of a page when we break out of the above loop
2566          * with no more slots available in the RXQ
2567          */
2568         if (pagep) {
2569                 prev_page_info->last_frag = true;
2570                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2571         }
2572
2573         if (posted) {
2574                 atomic_add(posted, &rxq->used);
2575                 if (rxo->rx_post_starved)
2576                         rxo->rx_post_starved = false;
2577                 do {
2578                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2579                         be_rxq_notify(adapter, rxq->id, notify);
2580                         posted -= notify;
2581                 } while (posted);
2582         } else if (atomic_read(&rxq->used) == 0) {
2583                 /* Let be_worker replenish when memory is available */
2584                 rxo->rx_post_starved = true;
2585         }
2586 }
2587
2588 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2589 {
2590         struct be_queue_info *tx_cq = &txo->cq;
2591         struct be_tx_compl_info *txcp = &txo->txcp;
2592         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2593
2594         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2595                 return NULL;
2596
2597         /* Ensure load ordering of valid bit dword and other dwords below */
2598         rmb();
2599         be_dws_le_to_cpu(compl, sizeof(*compl));
2600
2601         txcp->status = GET_TX_COMPL_BITS(status, compl);
2602         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2603
2604         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2605         queue_tail_inc(tx_cq);
2606         return txcp;
2607 }
2608
2609 static u16 be_tx_compl_process(struct be_adapter *adapter,
2610                                struct be_tx_obj *txo, u16 last_index)
2611 {
2612         struct sk_buff **sent_skbs = txo->sent_skb_list;
2613         struct be_queue_info *txq = &txo->q;
2614         struct sk_buff *skb = NULL;
2615         bool unmap_skb_hdr = false;
2616         struct be_eth_wrb *wrb;
2617         u16 num_wrbs = 0;
2618         u32 frag_index;
2619
2620         do {
2621                 if (sent_skbs[txq->tail]) {
2622                         /* Free skb from prev req */
2623                         if (skb)
2624                                 dev_consume_skb_any(skb);
2625                         skb = sent_skbs[txq->tail];
2626                         sent_skbs[txq->tail] = NULL;
2627                         queue_tail_inc(txq);  /* skip hdr wrb */
2628                         num_wrbs++;
2629                         unmap_skb_hdr = true;
2630                 }
2631                 wrb = queue_tail_node(txq);
2632                 frag_index = txq->tail;
2633                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2634                               (unmap_skb_hdr && skb_headlen(skb)));
2635                 unmap_skb_hdr = false;
2636                 queue_tail_inc(txq);
2637                 num_wrbs++;
2638         } while (frag_index != last_index);
2639         dev_consume_skb_any(skb);
2640
2641         return num_wrbs;
2642 }
2643
2644 /* Return the number of events in the event queue */
2645 static inline int events_get(struct be_eq_obj *eqo)
2646 {
2647         struct be_eq_entry *eqe;
2648         int num = 0;
2649
2650         do {
2651                 eqe = queue_tail_node(&eqo->q);
2652                 if (eqe->evt == 0)
2653                         break;
2654
2655                 rmb();
2656                 eqe->evt = 0;
2657                 num++;
2658                 queue_tail_inc(&eqo->q);
2659         } while (true);
2660
2661         return num;
2662 }
2663
2664 /* Leaves the EQ is disarmed state */
2665 static void be_eq_clean(struct be_eq_obj *eqo)
2666 {
2667         int num = events_get(eqo);
2668
2669         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2670 }
2671
2672 /* Free posted rx buffers that were not used */
2673 static void be_rxq_clean(struct be_rx_obj *rxo)
2674 {
2675         struct be_queue_info *rxq = &rxo->q;
2676         struct be_rx_page_info *page_info;
2677
2678         while (atomic_read(&rxq->used) > 0) {
2679                 page_info = get_rx_page_info(rxo);
2680                 put_page(page_info->page);
2681                 memset(page_info, 0, sizeof(*page_info));
2682         }
2683         BUG_ON(atomic_read(&rxq->used));
2684         rxq->tail = 0;
2685         rxq->head = 0;
2686 }
2687
2688 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2689 {
2690         struct be_queue_info *rx_cq = &rxo->cq;
2691         struct be_rx_compl_info *rxcp;
2692         struct be_adapter *adapter = rxo->adapter;
2693         int flush_wait = 0;
2694
2695         /* Consume pending rx completions.
2696          * Wait for the flush completion (identified by zero num_rcvd)
2697          * to arrive. Notify CQ even when there are no more CQ entries
2698          * for HW to flush partially coalesced CQ entries.
2699          * In Lancer, there is no need to wait for flush compl.
2700          */
2701         for (;;) {
2702                 rxcp = be_rx_compl_get(rxo);
2703                 if (!rxcp) {
2704                         if (lancer_chip(adapter))
2705                                 break;
2706
2707                         if (flush_wait++ > 50 ||
2708                             be_check_error(adapter,
2709                                            BE_ERROR_HW)) {
2710                                 dev_warn(&adapter->pdev->dev,
2711                                          "did not receive flush compl\n");
2712                                 break;
2713                         }
2714                         be_cq_notify(adapter, rx_cq->id, true, 0);
2715                         mdelay(1);
2716                 } else {
2717                         be_rx_compl_discard(rxo, rxcp);
2718                         be_cq_notify(adapter, rx_cq->id, false, 1);
2719                         if (rxcp->num_rcvd == 0)
2720                                 break;
2721                 }
2722         }
2723
2724         /* After cleanup, leave the CQ in unarmed state */
2725         be_cq_notify(adapter, rx_cq->id, false, 0);
2726 }
2727
2728 static void be_tx_compl_clean(struct be_adapter *adapter)
2729 {
2730         struct device *dev = &adapter->pdev->dev;
2731         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2732         struct be_tx_compl_info *txcp;
2733         struct be_queue_info *txq;
2734         u32 end_idx, notified_idx;
2735         struct be_tx_obj *txo;
2736         int i, pending_txqs;
2737
2738         /* Stop polling for compls when HW has been silent for 10ms */
2739         do {
2740                 pending_txqs = adapter->num_tx_qs;
2741
2742                 for_all_tx_queues(adapter, txo, i) {
2743                         cmpl = 0;
2744                         num_wrbs = 0;
2745                         txq = &txo->q;
2746                         while ((txcp = be_tx_compl_get(txo))) {
2747                                 num_wrbs +=
2748                                         be_tx_compl_process(adapter, txo,
2749                                                             txcp->end_index);
2750                                 cmpl++;
2751                         }
2752                         if (cmpl) {
2753                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2754                                 atomic_sub(num_wrbs, &txq->used);
2755                                 timeo = 0;
2756                         }
2757                         if (!be_is_tx_compl_pending(txo))
2758                                 pending_txqs--;
2759                 }
2760
2761                 if (pending_txqs == 0 || ++timeo > 10 ||
2762                     be_check_error(adapter, BE_ERROR_HW))
2763                         break;
2764
2765                 mdelay(1);
2766         } while (true);
2767
2768         /* Free enqueued TX that was never notified to HW */
2769         for_all_tx_queues(adapter, txo, i) {
2770                 txq = &txo->q;
2771
2772                 if (atomic_read(&txq->used)) {
2773                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2774                                  i, atomic_read(&txq->used));
2775                         notified_idx = txq->tail;
2776                         end_idx = txq->tail;
2777                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2778                                   txq->len);
2779                         /* Use the tx-compl process logic to handle requests
2780                          * that were not sent to the HW.
2781                          */
2782                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2783                         atomic_sub(num_wrbs, &txq->used);
2784                         BUG_ON(atomic_read(&txq->used));
2785                         txo->pend_wrb_cnt = 0;
2786                         /* Since hw was never notified of these requests,
2787                          * reset TXQ indices
2788                          */
2789                         txq->head = notified_idx;
2790                         txq->tail = notified_idx;
2791                 }
2792         }
2793 }
2794
2795 static void be_evt_queues_destroy(struct be_adapter *adapter)
2796 {
2797         struct be_eq_obj *eqo;
2798         int i;
2799
2800         for_all_evt_queues(adapter, eqo, i) {
2801                 if (eqo->q.created) {
2802                         be_eq_clean(eqo);
2803                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2804                         netif_napi_del(&eqo->napi);
2805                         free_cpumask_var(eqo->affinity_mask);
2806                 }
2807                 be_queue_free(adapter, &eqo->q);
2808         }
2809 }
2810
2811 static int be_evt_queues_create(struct be_adapter *adapter)
2812 {
2813         struct be_queue_info *eq;
2814         struct be_eq_obj *eqo;
2815         struct be_aic_obj *aic;
2816         int i, rc;
2817
2818         /* need enough EQs to service both RX and TX queues */
2819         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2820                                     max(adapter->cfg_num_rx_irqs,
2821                                         adapter->cfg_num_tx_irqs));
2822
2823         for_all_evt_queues(adapter, eqo, i) {
2824                 int numa_node = dev_to_node(&adapter->pdev->dev);
2825
2826                 aic = &adapter->aic_obj[i];
2827                 eqo->adapter = adapter;
2828                 eqo->idx = i;
2829                 aic->max_eqd = BE_MAX_EQD;
2830                 aic->enable = true;
2831
2832                 eq = &eqo->q;
2833                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2834                                     sizeof(struct be_eq_entry));
2835                 if (rc)
2836                         return rc;
2837
2838                 rc = be_cmd_eq_create(adapter, eqo);
2839                 if (rc)
2840                         return rc;
2841
2842                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2843                         return -ENOMEM;
2844                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2845                                 eqo->affinity_mask);
2846                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2847                                BE_NAPI_WEIGHT);
2848         }
2849         return 0;
2850 }
2851
2852 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2853 {
2854         struct be_queue_info *q;
2855
2856         q = &adapter->mcc_obj.q;
2857         if (q->created)
2858                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2859         be_queue_free(adapter, q);
2860
2861         q = &adapter->mcc_obj.cq;
2862         if (q->created)
2863                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2864         be_queue_free(adapter, q);
2865 }
2866
2867 /* Must be called only after TX qs are created as MCC shares TX EQ */
2868 static int be_mcc_queues_create(struct be_adapter *adapter)
2869 {
2870         struct be_queue_info *q, *cq;
2871
2872         cq = &adapter->mcc_obj.cq;
2873         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2874                            sizeof(struct be_mcc_compl)))
2875                 goto err;
2876
2877         /* Use the default EQ for MCC completions */
2878         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2879                 goto mcc_cq_free;
2880
2881         q = &adapter->mcc_obj.q;
2882         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2883                 goto mcc_cq_destroy;
2884
2885         if (be_cmd_mccq_create(adapter, q, cq))
2886                 goto mcc_q_free;
2887
2888         return 0;
2889
2890 mcc_q_free:
2891         be_queue_free(adapter, q);
2892 mcc_cq_destroy:
2893         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2894 mcc_cq_free:
2895         be_queue_free(adapter, cq);
2896 err:
2897         return -1;
2898 }
2899
2900 static void be_tx_queues_destroy(struct be_adapter *adapter)
2901 {
2902         struct be_queue_info *q;
2903         struct be_tx_obj *txo;
2904         u8 i;
2905
2906         for_all_tx_queues(adapter, txo, i) {
2907                 q = &txo->q;
2908                 if (q->created)
2909                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2910                 be_queue_free(adapter, q);
2911
2912                 q = &txo->cq;
2913                 if (q->created)
2914                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2915                 be_queue_free(adapter, q);
2916         }
2917 }
2918
2919 static int be_tx_qs_create(struct be_adapter *adapter)
2920 {
2921         struct be_queue_info *cq;
2922         struct be_tx_obj *txo;
2923         struct be_eq_obj *eqo;
2924         int status, i;
2925
2926         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2927
2928         for_all_tx_queues(adapter, txo, i) {
2929                 cq = &txo->cq;
2930                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2931                                         sizeof(struct be_eth_tx_compl));
2932                 if (status)
2933                         return status;
2934
2935                 u64_stats_init(&txo->stats.sync);
2936                 u64_stats_init(&txo->stats.sync_compl);
2937
2938                 /* If num_evt_qs is less than num_tx_qs, then more than
2939                  * one txq share an eq
2940                  */
2941                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2942                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2943                 if (status)
2944                         return status;
2945
2946                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2947                                         sizeof(struct be_eth_wrb));
2948                 if (status)
2949                         return status;
2950
2951                 status = be_cmd_txq_create(adapter, txo);
2952                 if (status)
2953                         return status;
2954
2955                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2956                                     eqo->idx);
2957         }
2958
2959         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2960                  adapter->num_tx_qs);
2961         return 0;
2962 }
2963
2964 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2965 {
2966         struct be_queue_info *q;
2967         struct be_rx_obj *rxo;
2968         int i;
2969
2970         for_all_rx_queues(adapter, rxo, i) {
2971                 q = &rxo->cq;
2972                 if (q->created)
2973                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2974                 be_queue_free(adapter, q);
2975         }
2976 }
2977
2978 static int be_rx_cqs_create(struct be_adapter *adapter)
2979 {
2980         struct be_queue_info *eq, *cq;
2981         struct be_rx_obj *rxo;
2982         int rc, i;
2983
2984         adapter->num_rss_qs =
2985                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2986
2987         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2988         if (adapter->num_rss_qs < 2)
2989                 adapter->num_rss_qs = 0;
2990
2991         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2992
2993         /* When the interface is not capable of RSS rings (and there is no
2994          * need to create a default RXQ) we'll still need one RXQ
2995          */
2996         if (adapter->num_rx_qs == 0)
2997                 adapter->num_rx_qs = 1;
2998
2999         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3000         for_all_rx_queues(adapter, rxo, i) {
3001                 rxo->adapter = adapter;
3002                 cq = &rxo->cq;
3003                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3004                                     sizeof(struct be_eth_rx_compl));
3005                 if (rc)
3006                         return rc;
3007
3008                 u64_stats_init(&rxo->stats.sync);
3009                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3010                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3011                 if (rc)
3012                         return rc;
3013         }
3014
3015         dev_info(&adapter->pdev->dev,
3016                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3017         return 0;
3018 }
3019
3020 static irqreturn_t be_intx(int irq, void *dev)
3021 {
3022         struct be_eq_obj *eqo = dev;
3023         struct be_adapter *adapter = eqo->adapter;
3024         int num_evts = 0;
3025
3026         /* IRQ is not expected when NAPI is scheduled as the EQ
3027          * will not be armed.
3028          * But, this can happen on Lancer INTx where it takes
3029          * a while to de-assert INTx or in BE2 where occasionaly
3030          * an interrupt may be raised even when EQ is unarmed.
3031          * If NAPI is already scheduled, then counting & notifying
3032          * events will orphan them.
3033          */
3034         if (napi_schedule_prep(&eqo->napi)) {
3035                 num_evts = events_get(eqo);
3036                 __napi_schedule(&eqo->napi);
3037                 if (num_evts)
3038                         eqo->spurious_intr = 0;
3039         }
3040         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3041
3042         /* Return IRQ_HANDLED only for the the first spurious intr
3043          * after a valid intr to stop the kernel from branding
3044          * this irq as a bad one!
3045          */
3046         if (num_evts || eqo->spurious_intr++ == 0)
3047                 return IRQ_HANDLED;
3048         else
3049                 return IRQ_NONE;
3050 }
3051
3052 static irqreturn_t be_msix(int irq, void *dev)
3053 {
3054         struct be_eq_obj *eqo = dev;
3055
3056         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3057         napi_schedule(&eqo->napi);
3058         return IRQ_HANDLED;
3059 }
3060
3061 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3062 {
3063         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3064 }
3065
3066 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3067                          int budget)
3068 {
3069         struct be_adapter *adapter = rxo->adapter;
3070         struct be_queue_info *rx_cq = &rxo->cq;
3071         struct be_rx_compl_info *rxcp;
3072         u32 work_done;
3073         u32 frags_consumed = 0;
3074
3075         for (work_done = 0; work_done < budget; work_done++) {
3076                 rxcp = be_rx_compl_get(rxo);
3077                 if (!rxcp)
3078                         break;
3079
3080                 /* Is it a flush compl that has no data */
3081                 if (unlikely(rxcp->num_rcvd == 0))
3082                         goto loop_continue;
3083
3084                 /* Discard compl with partial DMA Lancer B0 */
3085                 if (unlikely(!rxcp->pkt_size)) {
3086                         be_rx_compl_discard(rxo, rxcp);
3087                         goto loop_continue;
3088                 }
3089
3090                 /* On BE drop pkts that arrive due to imperfect filtering in
3091                  * promiscuous mode on some skews
3092                  */
3093                 if (unlikely(rxcp->port != adapter->port_num &&
3094                              !lancer_chip(adapter))) {
3095                         be_rx_compl_discard(rxo, rxcp);
3096                         goto loop_continue;
3097                 }
3098
3099                 if (do_gro(rxcp))
3100                         be_rx_compl_process_gro(rxo, napi, rxcp);
3101                 else
3102                         be_rx_compl_process(rxo, napi, rxcp);
3103
3104 loop_continue:
3105                 frags_consumed += rxcp->num_rcvd;
3106                 be_rx_stats_update(rxo, rxcp);
3107         }
3108
3109         if (work_done) {
3110                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3111
3112                 /* When an rx-obj gets into post_starved state, just
3113                  * let be_worker do the posting.
3114                  */
3115                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3116                     !rxo->rx_post_starved)
3117                         be_post_rx_frags(rxo, GFP_ATOMIC,
3118                                          max_t(u32, MAX_RX_POST,
3119                                                frags_consumed));
3120         }
3121
3122         return work_done;
3123 }
3124
3125 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3126 {
3127         switch (status) {
3128         case BE_TX_COMP_HDR_PARSE_ERR:
3129                 tx_stats(txo)->tx_hdr_parse_err++;
3130                 break;
3131         case BE_TX_COMP_NDMA_ERR:
3132                 tx_stats(txo)->tx_dma_err++;
3133                 break;
3134         case BE_TX_COMP_ACL_ERR:
3135                 tx_stats(txo)->tx_spoof_check_err++;
3136                 break;
3137         }
3138 }
3139
3140 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3141 {
3142         switch (status) {
3143         case LANCER_TX_COMP_LSO_ERR:
3144                 tx_stats(txo)->tx_tso_err++;
3145                 break;
3146         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3147         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3148                 tx_stats(txo)->tx_spoof_check_err++;
3149                 break;
3150         case LANCER_TX_COMP_QINQ_ERR:
3151                 tx_stats(txo)->tx_qinq_err++;
3152                 break;
3153         case LANCER_TX_COMP_PARITY_ERR:
3154                 tx_stats(txo)->tx_internal_parity_err++;
3155                 break;
3156         case LANCER_TX_COMP_DMA_ERR:
3157                 tx_stats(txo)->tx_dma_err++;
3158                 break;
3159         }
3160 }
3161
3162 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3163                           int idx)
3164 {
3165         int num_wrbs = 0, work_done = 0;
3166         struct be_tx_compl_info *txcp;
3167
3168         while ((txcp = be_tx_compl_get(txo))) {
3169                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3170                 work_done++;
3171
3172                 if (txcp->status) {
3173                         if (lancer_chip(adapter))
3174                                 lancer_update_tx_err(txo, txcp->status);
3175                         else
3176                                 be_update_tx_err(txo, txcp->status);
3177                 }
3178         }
3179
3180         if (work_done) {
3181                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3182                 atomic_sub(num_wrbs, &txo->q.used);
3183
3184                 /* As Tx wrbs have been freed up, wake up netdev queue
3185                  * if it was stopped due to lack of tx wrbs.  */
3186                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3187                     be_can_txq_wake(txo)) {
3188                         netif_wake_subqueue(adapter->netdev, idx);
3189                 }
3190
3191                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3192                 tx_stats(txo)->tx_compl += work_done;
3193                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3194         }
3195 }
3196
3197 int be_poll(struct napi_struct *napi, int budget)
3198 {
3199         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3200         struct be_adapter *adapter = eqo->adapter;
3201         int max_work = 0, work, i, num_evts;
3202         struct be_rx_obj *rxo;
3203         struct be_tx_obj *txo;
3204         u32 mult_enc = 0;
3205
3206         num_evts = events_get(eqo);
3207
3208         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3209                 be_process_tx(adapter, txo, i);
3210
3211         /* This loop will iterate twice for EQ0 in which
3212          * completions of the last RXQ (default one) are also processed
3213          * For other EQs the loop iterates only once
3214          */
3215         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3216                 work = be_process_rx(rxo, napi, budget);
3217                 max_work = max(work, max_work);
3218         }
3219
3220         if (is_mcc_eqo(eqo))
3221                 be_process_mcc(adapter);
3222
3223         if (max_work < budget) {
3224                 napi_complete_done(napi, max_work);
3225
3226                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3227                  * delay via a delay multiplier encoding value
3228                  */
3229                 if (skyhawk_chip(adapter))
3230                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3231
3232                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3233                              mult_enc);
3234         } else {
3235                 /* As we'll continue in polling mode, count and clear events */
3236                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3237         }
3238         return max_work;
3239 }
3240
3241 void be_detect_error(struct be_adapter *adapter)
3242 {
3243         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3244         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3245         struct device *dev = &adapter->pdev->dev;
3246         u16 val;
3247         u32 i;
3248
3249         if (be_check_error(adapter, BE_ERROR_HW))
3250                 return;
3251
3252         if (lancer_chip(adapter)) {
3253                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3254                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3255                         be_set_error(adapter, BE_ERROR_UE);
3256                         sliport_err1 = ioread32(adapter->db +
3257                                                 SLIPORT_ERROR1_OFFSET);
3258                         sliport_err2 = ioread32(adapter->db +
3259                                                 SLIPORT_ERROR2_OFFSET);
3260                         /* Do not log error messages if its a FW reset */
3261                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3262                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3263                                 dev_info(dev, "Firmware update in progress\n");
3264                         } else {
3265                                 dev_err(dev, "Error detected in the card\n");
3266                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3267                                         sliport_status);
3268                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3269                                         sliport_err1);
3270                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3271                                         sliport_err2);
3272                         }
3273                 }
3274         } else {
3275                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3276                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3277                 ue_lo_mask = ioread32(adapter->pcicfg +
3278                                       PCICFG_UE_STATUS_LOW_MASK);
3279                 ue_hi_mask = ioread32(adapter->pcicfg +
3280                                       PCICFG_UE_STATUS_HI_MASK);
3281
3282                 ue_lo = (ue_lo & ~ue_lo_mask);
3283                 ue_hi = (ue_hi & ~ue_hi_mask);
3284
3285                 if (ue_lo || ue_hi) {
3286                         /* On certain platforms BE3 hardware can indicate
3287                          * spurious UEs. In case of a UE in the chip,
3288                          * the POST register correctly reports either a
3289                          * FAT_LOG_START state (FW is currently dumping
3290                          * FAT log data) or a ARMFW_UE state. Check for the
3291                          * above states to ascertain if the UE is valid or not.
3292                          */
3293                         if (BE3_chip(adapter)) {
3294                                 val = be_POST_stage_get(adapter);
3295                                 if ((val & POST_STAGE_FAT_LOG_START)
3296                                      != POST_STAGE_FAT_LOG_START &&
3297                                     (val & POST_STAGE_ARMFW_UE)
3298                                      != POST_STAGE_ARMFW_UE &&
3299                                     (val & POST_STAGE_RECOVERABLE_ERR)
3300                                      != POST_STAGE_RECOVERABLE_ERR)
3301                                         return;
3302                         }
3303
3304                         dev_err(dev, "Error detected in the adapter");
3305                         be_set_error(adapter, BE_ERROR_UE);
3306
3307                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3308                                 if (ue_lo & 1)
3309                                         dev_err(dev, "UE: %s bit set\n",
3310                                                 ue_status_low_desc[i]);
3311                         }
3312                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3313                                 if (ue_hi & 1)
3314                                         dev_err(dev, "UE: %s bit set\n",
3315                                                 ue_status_hi_desc[i]);
3316                         }
3317                 }
3318         }
3319 }
3320
3321 static void be_msix_disable(struct be_adapter *adapter)
3322 {
3323         if (msix_enabled(adapter)) {
3324                 pci_disable_msix(adapter->pdev);
3325                 adapter->num_msix_vec = 0;
3326                 adapter->num_msix_roce_vec = 0;
3327         }
3328 }
3329
3330 static int be_msix_enable(struct be_adapter *adapter)
3331 {
3332         unsigned int i, max_roce_eqs;
3333         struct device *dev = &adapter->pdev->dev;
3334         int num_vec;
3335
3336         /* If RoCE is supported, program the max number of vectors that
3337          * could be used for NIC and RoCE, else, just program the number
3338          * we'll use initially.
3339          */
3340         if (be_roce_supported(adapter)) {
3341                 max_roce_eqs =
3342                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3343                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3344                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3345         } else {
3346                 num_vec = max(adapter->cfg_num_rx_irqs,
3347                               adapter->cfg_num_tx_irqs);
3348         }
3349
3350         for (i = 0; i < num_vec; i++)
3351                 adapter->msix_entries[i].entry = i;
3352
3353         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3354                                         MIN_MSIX_VECTORS, num_vec);
3355         if (num_vec < 0)
3356                 goto fail;
3357
3358         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3359                 adapter->num_msix_roce_vec = num_vec / 2;
3360                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3361                          adapter->num_msix_roce_vec);
3362         }
3363
3364         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3365
3366         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3367                  adapter->num_msix_vec);
3368         return 0;
3369
3370 fail:
3371         dev_warn(dev, "MSIx enable failed\n");
3372
3373         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3374         if (be_virtfn(adapter))
3375                 return num_vec;
3376         return 0;
3377 }
3378
3379 static inline int be_msix_vec_get(struct be_adapter *adapter,
3380                                   struct be_eq_obj *eqo)
3381 {
3382         return adapter->msix_entries[eqo->msix_idx].vector;
3383 }
3384
3385 static int be_msix_register(struct be_adapter *adapter)
3386 {
3387         struct net_device *netdev = adapter->netdev;
3388         struct be_eq_obj *eqo;
3389         int status, i, vec;
3390
3391         for_all_evt_queues(adapter, eqo, i) {
3392                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3393                 vec = be_msix_vec_get(adapter, eqo);
3394                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3395                 if (status)
3396                         goto err_msix;
3397
3398                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3399         }
3400
3401         return 0;
3402 err_msix:
3403         for (i--; i >= 0; i--) {
3404                 eqo = &adapter->eq_obj[i];
3405                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3406         }
3407         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3408                  status);
3409         be_msix_disable(adapter);
3410         return status;
3411 }
3412
3413 static int be_irq_register(struct be_adapter *adapter)
3414 {
3415         struct net_device *netdev = adapter->netdev;
3416         int status;
3417
3418         if (msix_enabled(adapter)) {
3419                 status = be_msix_register(adapter);
3420                 if (status == 0)
3421                         goto done;
3422                 /* INTx is not supported for VF */
3423                 if (be_virtfn(adapter))
3424                         return status;
3425         }
3426
3427         /* INTx: only the first EQ is used */
3428         netdev->irq = adapter->pdev->irq;
3429         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3430                              &adapter->eq_obj[0]);
3431         if (status) {
3432                 dev_err(&adapter->pdev->dev,
3433                         "INTx request IRQ failed - err %d\n", status);
3434                 return status;
3435         }
3436 done:
3437         adapter->isr_registered = true;
3438         return 0;
3439 }
3440
3441 static void be_irq_unregister(struct be_adapter *adapter)
3442 {
3443         struct net_device *netdev = adapter->netdev;
3444         struct be_eq_obj *eqo;
3445         int i, vec;
3446
3447         if (!adapter->isr_registered)
3448                 return;
3449
3450         /* INTx */
3451         if (!msix_enabled(adapter)) {
3452                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3453                 goto done;
3454         }
3455
3456         /* MSIx */
3457         for_all_evt_queues(adapter, eqo, i) {
3458                 vec = be_msix_vec_get(adapter, eqo);
3459                 irq_set_affinity_hint(vec, NULL);
3460                 free_irq(vec, eqo);
3461         }
3462
3463 done:
3464         adapter->isr_registered = false;
3465 }
3466
3467 static void be_rx_qs_destroy(struct be_adapter *adapter)
3468 {
3469         struct rss_info *rss = &adapter->rss_info;
3470         struct be_queue_info *q;
3471         struct be_rx_obj *rxo;
3472         int i;
3473
3474         for_all_rx_queues(adapter, rxo, i) {
3475                 q = &rxo->q;
3476                 if (q->created) {
3477                         /* If RXQs are destroyed while in an "out of buffer"
3478                          * state, there is a possibility of an HW stall on
3479                          * Lancer. So, post 64 buffers to each queue to relieve
3480                          * the "out of buffer" condition.
3481                          * Make sure there's space in the RXQ before posting.
3482                          */
3483                         if (lancer_chip(adapter)) {
3484                                 be_rx_cq_clean(rxo);
3485                                 if (atomic_read(&q->used) == 0)
3486                                         be_post_rx_frags(rxo, GFP_KERNEL,
3487                                                          MAX_RX_POST);
3488                         }
3489
3490                         be_cmd_rxq_destroy(adapter, q);
3491                         be_rx_cq_clean(rxo);
3492                         be_rxq_clean(rxo);
3493                 }
3494                 be_queue_free(adapter, q);
3495         }
3496
3497         if (rss->rss_flags) {
3498                 rss->rss_flags = RSS_ENABLE_NONE;
3499                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3500                                   128, rss->rss_hkey);
3501         }
3502 }
3503
3504 static void be_disable_if_filters(struct be_adapter *adapter)
3505 {
3506         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3507         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3508             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3509                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3510                 eth_zero_addr(adapter->dev_mac);
3511         }
3512
3513         be_clear_uc_list(adapter);
3514         be_clear_mc_list(adapter);
3515
3516         /* The IFACE flags are enabled in the open path and cleared
3517          * in the close path. When a VF gets detached from the host and
3518          * assigned to a VM the following happens:
3519          *      - VF's IFACE flags get cleared in the detach path
3520          *      - IFACE create is issued by the VF in the attach path
3521          * Due to a bug in the BE3/Skyhawk-R FW
3522          * (Lancer FW doesn't have the bug), the IFACE capability flags
3523          * specified along with the IFACE create cmd issued by a VF are not
3524          * honoured by FW.  As a consequence, if a *new* driver
3525          * (that enables/disables IFACE flags in open/close)
3526          * is loaded in the host and an *old* driver is * used by a VM/VF,
3527          * the IFACE gets created *without* the needed flags.
3528          * To avoid this, disable RX-filter flags only for Lancer.
3529          */
3530         if (lancer_chip(adapter)) {
3531                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3532                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3533         }
3534 }
3535
3536 static int be_close(struct net_device *netdev)
3537 {
3538         struct be_adapter *adapter = netdev_priv(netdev);
3539         struct be_eq_obj *eqo;
3540         int i;
3541
3542         /* This protection is needed as be_close() may be called even when the
3543          * adapter is in cleared state (after eeh perm failure)
3544          */
3545         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3546                 return 0;
3547
3548         /* Before attempting cleanup ensure all the pending cmds in the
3549          * config_wq have finished execution
3550          */
3551         flush_workqueue(be_wq);
3552
3553         be_disable_if_filters(adapter);
3554
3555         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3556                 for_all_evt_queues(adapter, eqo, i) {
3557                         napi_disable(&eqo->napi);
3558                 }
3559                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3560         }
3561
3562         be_async_mcc_disable(adapter);
3563
3564         /* Wait for all pending tx completions to arrive so that
3565          * all tx skbs are freed.
3566          */
3567         netif_tx_disable(netdev);
3568         be_tx_compl_clean(adapter);
3569
3570         be_rx_qs_destroy(adapter);
3571
3572         for_all_evt_queues(adapter, eqo, i) {
3573                 if (msix_enabled(adapter))
3574                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3575                 else
3576                         synchronize_irq(netdev->irq);
3577                 be_eq_clean(eqo);
3578         }
3579
3580         be_irq_unregister(adapter);
3581
3582         return 0;
3583 }
3584
3585 static int be_rx_qs_create(struct be_adapter *adapter)
3586 {
3587         struct rss_info *rss = &adapter->rss_info;
3588         u8 rss_key[RSS_HASH_KEY_LEN];
3589         struct be_rx_obj *rxo;
3590         int rc, i, j;
3591
3592         for_all_rx_queues(adapter, rxo, i) {
3593                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3594                                     sizeof(struct be_eth_rx_d));
3595                 if (rc)
3596                         return rc;
3597         }
3598
3599         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3600                 rxo = default_rxo(adapter);
3601                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3602                                        rx_frag_size, adapter->if_handle,
3603                                        false, &rxo->rss_id);
3604                 if (rc)
3605                         return rc;
3606         }
3607
3608         for_all_rss_queues(adapter, rxo, i) {
3609                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3610                                        rx_frag_size, adapter->if_handle,
3611                                        true, &rxo->rss_id);
3612                 if (rc)
3613                         return rc;
3614         }
3615
3616         if (be_multi_rxq(adapter)) {
3617                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3618                         for_all_rss_queues(adapter, rxo, i) {
3619                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3620                                         break;
3621                                 rss->rsstable[j + i] = rxo->rss_id;
3622                                 rss->rss_queue[j + i] = i;
3623                         }
3624                 }
3625                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3626                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3627
3628                 if (!BEx_chip(adapter))
3629                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3630                                 RSS_ENABLE_UDP_IPV6;
3631
3632                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3633                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3634                                        RSS_INDIR_TABLE_LEN, rss_key);
3635                 if (rc) {
3636                         rss->rss_flags = RSS_ENABLE_NONE;
3637                         return rc;
3638                 }
3639
3640                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3641         } else {
3642                 /* Disable RSS, if only default RX Q is created */
3643                 rss->rss_flags = RSS_ENABLE_NONE;
3644         }
3645
3646
3647         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3648          * which is a queue empty condition
3649          */
3650         for_all_rx_queues(adapter, rxo, i)
3651                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3652
3653         return 0;
3654 }
3655
3656 static int be_enable_if_filters(struct be_adapter *adapter)
3657 {
3658         int status;
3659
3660         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3661         if (status)
3662                 return status;
3663
3664         /* Normally this condition usually true as the ->dev_mac is zeroed.
3665          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3666          * subsequent be_dev_mac_add() can fail (after fresh boot)
3667          */
3668         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3669                 int old_pmac_id = -1;
3670
3671                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3672                 if (!is_zero_ether_addr(adapter->dev_mac))
3673                         old_pmac_id = adapter->pmac_id[0];
3674
3675                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3676                 if (status)
3677                         return status;
3678
3679                 /* Delete the old programmed MAC as we successfully programmed
3680                  * a new MAC
3681                  */
3682                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3683                         be_dev_mac_del(adapter, old_pmac_id);
3684
3685                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3686         }
3687
3688         if (adapter->vlans_added)
3689                 be_vid_config(adapter);
3690
3691         __be_set_rx_mode(adapter);
3692
3693         return 0;
3694 }
3695
3696 static int be_open(struct net_device *netdev)
3697 {
3698         struct be_adapter *adapter = netdev_priv(netdev);
3699         struct be_eq_obj *eqo;
3700         struct be_rx_obj *rxo;
3701         struct be_tx_obj *txo;
3702         u8 link_status;
3703         int status, i;
3704
3705         status = be_rx_qs_create(adapter);
3706         if (status)
3707                 goto err;
3708
3709         status = be_enable_if_filters(adapter);
3710         if (status)
3711                 goto err;
3712
3713         status = be_irq_register(adapter);
3714         if (status)
3715                 goto err;
3716
3717         for_all_rx_queues(adapter, rxo, i)
3718                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3719
3720         for_all_tx_queues(adapter, txo, i)
3721                 be_cq_notify(adapter, txo->cq.id, true, 0);
3722
3723         be_async_mcc_enable(adapter);
3724
3725         for_all_evt_queues(adapter, eqo, i) {
3726                 napi_enable(&eqo->napi);
3727                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3728         }
3729         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3730
3731         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3732         if (!status)
3733                 be_link_status_update(adapter, link_status);
3734
3735         netif_tx_start_all_queues(netdev);
3736         if (skyhawk_chip(adapter))
3737                 udp_tunnel_get_rx_info(netdev);
3738
3739         return 0;
3740 err:
3741         be_close(adapter->netdev);
3742         return -EIO;
3743 }
3744
3745 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3746 {
3747         u32 addr;
3748
3749         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3750
3751         mac[5] = (u8)(addr & 0xFF);
3752         mac[4] = (u8)((addr >> 8) & 0xFF);
3753         mac[3] = (u8)((addr >> 16) & 0xFF);
3754         /* Use the OUI from the current MAC address */
3755         memcpy(mac, adapter->netdev->dev_addr, 3);
3756 }
3757
3758 /*
3759  * Generate a seed MAC address from the PF MAC Address using jhash.
3760  * MAC Address for VFs are assigned incrementally starting from the seed.
3761  * These addresses are programmed in the ASIC by the PF and the VF driver
3762  * queries for the MAC address during its probe.
3763  */
3764 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3765 {
3766         u32 vf;
3767         int status = 0;
3768         u8 mac[ETH_ALEN];
3769         struct be_vf_cfg *vf_cfg;
3770
3771         be_vf_eth_addr_generate(adapter, mac);
3772
3773         for_all_vfs(adapter, vf_cfg, vf) {
3774                 if (BEx_chip(adapter))
3775                         status = be_cmd_pmac_add(adapter, mac,
3776                                                  vf_cfg->if_handle,
3777                                                  &vf_cfg->pmac_id, vf + 1);
3778                 else
3779                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3780                                                 vf + 1);
3781
3782                 if (status)
3783                         dev_err(&adapter->pdev->dev,
3784                                 "Mac address assignment failed for VF %d\n",
3785                                 vf);
3786                 else
3787                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3788
3789                 mac[5] += 1;
3790         }
3791         return status;
3792 }
3793
3794 static int be_vfs_mac_query(struct be_adapter *adapter)
3795 {
3796         int status, vf;
3797         u8 mac[ETH_ALEN];
3798         struct be_vf_cfg *vf_cfg;
3799
3800         for_all_vfs(adapter, vf_cfg, vf) {
3801                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3802                                                mac, vf_cfg->if_handle,
3803                                                false, vf+1);
3804                 if (status)
3805                         return status;
3806                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3807         }
3808         return 0;
3809 }
3810
3811 static void be_vf_clear(struct be_adapter *adapter)
3812 {
3813         struct be_vf_cfg *vf_cfg;
3814         u32 vf;
3815
3816         if (pci_vfs_assigned(adapter->pdev)) {
3817                 dev_warn(&adapter->pdev->dev,
3818                          "VFs are assigned to VMs: not disabling VFs\n");
3819                 goto done;
3820         }
3821
3822         pci_disable_sriov(adapter->pdev);
3823
3824         for_all_vfs(adapter, vf_cfg, vf) {
3825                 if (BEx_chip(adapter))
3826                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3827                                         vf_cfg->pmac_id, vf + 1);
3828                 else
3829                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3830                                        vf + 1);
3831
3832                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3833         }
3834
3835         if (BE3_chip(adapter))
3836                 be_cmd_set_hsw_config(adapter, 0, 0,
3837                                       adapter->if_handle,
3838                                       PORT_FWD_TYPE_PASSTHRU, 0);
3839 done:
3840         kfree(adapter->vf_cfg);
3841         adapter->num_vfs = 0;
3842         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3843 }
3844
3845 static void be_clear_queues(struct be_adapter *adapter)
3846 {
3847         be_mcc_queues_destroy(adapter);
3848         be_rx_cqs_destroy(adapter);
3849         be_tx_queues_destroy(adapter);
3850         be_evt_queues_destroy(adapter);
3851 }
3852
3853 static void be_cancel_worker(struct be_adapter *adapter)
3854 {
3855         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3856                 cancel_delayed_work_sync(&adapter->work);
3857                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3858         }
3859 }
3860
3861 static void be_cancel_err_detection(struct be_adapter *adapter)
3862 {
3863         struct be_error_recovery *err_rec = &adapter->error_recovery;
3864
3865         if (!be_err_recovery_workq)
3866                 return;
3867
3868         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3869                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3870                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3871         }
3872 }
3873
3874 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3875 {
3876         struct net_device *netdev = adapter->netdev;
3877         struct device *dev = &adapter->pdev->dev;
3878         struct be_vxlan_port *vxlan_port;
3879         __be16 port;
3880         int status;
3881
3882         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3883                                       struct be_vxlan_port, list);
3884         port = vxlan_port->port;
3885
3886         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3887                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3888         if (status) {
3889                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3890                 return status;
3891         }
3892         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3893
3894         status = be_cmd_set_vxlan_port(adapter, port);
3895         if (status) {
3896                 dev_warn(dev, "Failed to add VxLAN port\n");
3897                 return status;
3898         }
3899         adapter->vxlan_port = port;
3900
3901         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3902                                    NETIF_F_TSO | NETIF_F_TSO6 |
3903                                    NETIF_F_GSO_UDP_TUNNEL;
3904
3905         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3906                  be16_to_cpu(port));
3907         return 0;
3908 }
3909
3910 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3911 {
3912         struct net_device *netdev = adapter->netdev;
3913
3914         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3915                 be_cmd_manage_iface(adapter, adapter->if_handle,
3916                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3917
3918         if (adapter->vxlan_port)
3919                 be_cmd_set_vxlan_port(adapter, 0);
3920
3921         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3922         adapter->vxlan_port = 0;
3923
3924         netdev->hw_enc_features = 0;
3925 }
3926
3927 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3928                                 struct be_resources *vft_res)
3929 {
3930         struct be_resources res = adapter->pool_res;
3931         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3932         struct be_resources res_mod = {0};
3933         u16 num_vf_qs = 1;
3934
3935         /* Distribute the queue resources among the PF and it's VFs */
3936         if (num_vfs) {
3937                 /* Divide the rx queues evenly among the VFs and the PF, capped
3938                  * at VF-EQ-count. Any remainder queues belong to the PF.
3939                  */
3940                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3941                                 res.max_rss_qs / (num_vfs + 1));
3942
3943                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3944                  * RSS Tables per port. Provide RSS on VFs, only if number of
3945                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3946                  */
3947                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3948                         num_vf_qs = 1;
3949         }
3950
3951         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3952          * which are modifiable using SET_PROFILE_CONFIG cmd.
3953          */
3954         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3955                                   RESOURCE_MODIFIABLE, 0);
3956
3957         /* If RSS IFACE capability flags are modifiable for a VF, set the
3958          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3959          * more than 1 RSSQ is available for a VF.
3960          * Otherwise, provision only 1 queue pair for VF.
3961          */
3962         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3963                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3964                 if (num_vf_qs > 1) {
3965                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3966                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3967                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3968                 } else {
3969                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3970                                              BE_IF_FLAGS_DEFQ_RSS);
3971                 }
3972         } else {
3973                 num_vf_qs = 1;
3974         }
3975
3976         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3977                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3978                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3979         }
3980
3981         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3982         vft_res->max_rx_qs = num_vf_qs;
3983         vft_res->max_rss_qs = num_vf_qs;
3984         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3985         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3986
3987         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3988          * among the PF and it's VFs, if the fields are changeable
3989          */
3990         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3991                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3992
3993         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3994                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3995
3996         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3997                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3998
3999         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4000                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4001 }
4002
4003 static void be_if_destroy(struct be_adapter *adapter)
4004 {
4005         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4006
4007         kfree(adapter->pmac_id);
4008         adapter->pmac_id = NULL;
4009
4010         kfree(adapter->mc_list);
4011         adapter->mc_list = NULL;
4012
4013         kfree(adapter->uc_list);
4014         adapter->uc_list = NULL;
4015 }
4016
4017 static int be_clear(struct be_adapter *adapter)
4018 {
4019         struct pci_dev *pdev = adapter->pdev;
4020         struct  be_resources vft_res = {0};
4021
4022         be_cancel_worker(adapter);
4023
4024         flush_workqueue(be_wq);
4025
4026         if (sriov_enabled(adapter))
4027                 be_vf_clear(adapter);
4028
4029         /* Re-configure FW to distribute resources evenly across max-supported
4030          * number of VFs, only when VFs are not already enabled.
4031          */
4032         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4033             !pci_vfs_assigned(pdev)) {
4034                 be_calculate_vf_res(adapter,
4035                                     pci_sriov_get_totalvfs(pdev),
4036                                     &vft_res);
4037                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4038                                         pci_sriov_get_totalvfs(pdev),
4039                                         &vft_res);
4040         }
4041
4042         be_disable_vxlan_offloads(adapter);
4043
4044         be_if_destroy(adapter);
4045
4046         be_clear_queues(adapter);
4047
4048         be_msix_disable(adapter);
4049         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4050         return 0;
4051 }
4052
4053 static int be_vfs_if_create(struct be_adapter *adapter)
4054 {
4055         struct be_resources res = {0};
4056         u32 cap_flags, en_flags, vf;
4057         struct be_vf_cfg *vf_cfg;
4058         int status;
4059
4060         /* If a FW profile exists, then cap_flags are updated */
4061         cap_flags = BE_VF_IF_EN_FLAGS;
4062
4063         for_all_vfs(adapter, vf_cfg, vf) {
4064                 if (!BE3_chip(adapter)) {
4065                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4066                                                            ACTIVE_PROFILE_TYPE,
4067                                                            RESOURCE_LIMITS,
4068                                                            vf + 1);
4069                         if (!status) {
4070                                 cap_flags = res.if_cap_flags;
4071                                 /* Prevent VFs from enabling VLAN promiscuous
4072                                  * mode
4073                                  */
4074                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4075                         }
4076                 }
4077
4078                 /* PF should enable IF flags during proxy if_create call */
4079                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4080                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4081                                           &vf_cfg->if_handle, vf + 1);
4082                 if (status)
4083                         return status;
4084         }
4085
4086         return 0;
4087 }
4088
4089 static int be_vf_setup_init(struct be_adapter *adapter)
4090 {
4091         struct be_vf_cfg *vf_cfg;
4092         int vf;
4093
4094         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4095                                   GFP_KERNEL);
4096         if (!adapter->vf_cfg)
4097                 return -ENOMEM;
4098
4099         for_all_vfs(adapter, vf_cfg, vf) {
4100                 vf_cfg->if_handle = -1;
4101                 vf_cfg->pmac_id = -1;
4102         }
4103         return 0;
4104 }
4105
4106 static int be_vf_setup(struct be_adapter *adapter)
4107 {
4108         struct device *dev = &adapter->pdev->dev;
4109         struct be_vf_cfg *vf_cfg;
4110         int status, old_vfs, vf;
4111         bool spoofchk;
4112
4113         old_vfs = pci_num_vf(adapter->pdev);
4114
4115         status = be_vf_setup_init(adapter);
4116         if (status)
4117                 goto err;
4118
4119         if (old_vfs) {
4120                 for_all_vfs(adapter, vf_cfg, vf) {
4121                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4122                         if (status)
4123                                 goto err;
4124                 }
4125
4126                 status = be_vfs_mac_query(adapter);
4127                 if (status)
4128                         goto err;
4129         } else {
4130                 status = be_vfs_if_create(adapter);
4131                 if (status)
4132                         goto err;
4133
4134                 status = be_vf_eth_addr_config(adapter);
4135                 if (status)
4136                         goto err;
4137         }
4138
4139         for_all_vfs(adapter, vf_cfg, vf) {
4140                 /* Allow VFs to programs MAC/VLAN filters */
4141                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4142                                                   vf + 1);
4143                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4144                         status = be_cmd_set_fn_privileges(adapter,
4145                                                           vf_cfg->privileges |
4146                                                           BE_PRIV_FILTMGMT,
4147                                                           vf + 1);
4148                         if (!status) {
4149                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4150                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4151                                          vf);
4152                         }
4153                 }
4154
4155                 /* Allow full available bandwidth */
4156                 if (!old_vfs)
4157                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4158
4159                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4160                                                vf_cfg->if_handle, NULL,
4161                                                &spoofchk);
4162                 if (!status)
4163                         vf_cfg->spoofchk = spoofchk;
4164
4165                 if (!old_vfs) {
4166                         be_cmd_enable_vf(adapter, vf + 1);
4167                         be_cmd_set_logical_link_config(adapter,
4168                                                        IFLA_VF_LINK_STATE_AUTO,
4169                                                        vf+1);
4170                 }
4171         }
4172
4173         if (!old_vfs) {
4174                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4175                 if (status) {
4176                         dev_err(dev, "SRIOV enable failed\n");
4177                         adapter->num_vfs = 0;
4178                         goto err;
4179                 }
4180         }
4181
4182         if (BE3_chip(adapter)) {
4183                 /* On BE3, enable VEB only when SRIOV is enabled */
4184                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4185                                                adapter->if_handle,
4186                                                PORT_FWD_TYPE_VEB, 0);
4187                 if (status)
4188                         goto err;
4189         }
4190
4191         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4192         return 0;
4193 err:
4194         dev_err(dev, "VF setup failed\n");
4195         be_vf_clear(adapter);
4196         return status;
4197 }
4198
4199 /* Converting function_mode bits on BE3 to SH mc_type enums */
4200
4201 static u8 be_convert_mc_type(u32 function_mode)
4202 {
4203         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4204                 return vNIC1;
4205         else if (function_mode & QNQ_MODE)
4206                 return FLEX10;
4207         else if (function_mode & VNIC_MODE)
4208                 return vNIC2;
4209         else if (function_mode & UMC_ENABLED)
4210                 return UMC;
4211         else
4212                 return MC_NONE;
4213 }
4214
4215 /* On BE2/BE3 FW does not suggest the supported limits */
4216 static void BEx_get_resources(struct be_adapter *adapter,
4217                               struct be_resources *res)
4218 {
4219         bool use_sriov = adapter->num_vfs ? 1 : 0;
4220
4221         if (be_physfn(adapter))
4222                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4223         else
4224                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4225
4226         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4227
4228         if (be_is_mc(adapter)) {
4229                 /* Assuming that there are 4 channels per port,
4230                  * when multi-channel is enabled
4231                  */
4232                 if (be_is_qnq_mode(adapter))
4233                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4234                 else
4235                         /* In a non-qnq multichannel mode, the pvid
4236                          * takes up one vlan entry
4237                          */
4238                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4239         } else {
4240                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4241         }
4242
4243         res->max_mcast_mac = BE_MAX_MC;
4244
4245         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4246          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4247          *    *only* if it is RSS-capable.
4248          */
4249         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4250             be_virtfn(adapter) ||
4251             (be_is_mc(adapter) &&
4252              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4253                 res->max_tx_qs = 1;
4254         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4255                 struct be_resources super_nic_res = {0};
4256
4257                 /* On a SuperNIC profile, the driver needs to use the
4258                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4259                  */
4260                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4261                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4262                                           0);
4263                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4264                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4265         } else {
4266                 res->max_tx_qs = BE3_MAX_TX_QS;
4267         }
4268
4269         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4270             !use_sriov && be_physfn(adapter))
4271                 res->max_rss_qs = (adapter->be3_native) ?
4272                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4273         res->max_rx_qs = res->max_rss_qs + 1;
4274
4275         if (be_physfn(adapter))
4276                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4277                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4278         else
4279                 res->max_evt_qs = 1;
4280
4281         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4282         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4283         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4284                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4285 }
4286
4287 static void be_setup_init(struct be_adapter *adapter)
4288 {
4289         adapter->vlan_prio_bmap = 0xff;
4290         adapter->phy.link_speed = -1;
4291         adapter->if_handle = -1;
4292         adapter->be3_native = false;
4293         adapter->if_flags = 0;
4294         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4295         if (be_physfn(adapter))
4296                 adapter->cmd_privileges = MAX_PRIVILEGES;
4297         else
4298                 adapter->cmd_privileges = MIN_PRIVILEGES;
4299 }
4300
4301 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4302  * However, this HW limitation is not exposed to the host via any SLI cmd.
4303  * As a result, in the case of SRIOV and in particular multi-partition configs
4304  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4305  * for distribution between the VFs. This self-imposed limit will determine the
4306  * no: of VFs for which RSS can be enabled.
4307  */
4308 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4309 {
4310         struct be_port_resources port_res = {0};
4311         u8 rss_tables_on_port;
4312         u16 max_vfs = be_max_vfs(adapter);
4313
4314         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4315                                   RESOURCE_LIMITS, 0);
4316
4317         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4318
4319         /* Each PF Pool's RSS Tables limit =
4320          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4321          */
4322         adapter->pool_res.max_rss_tables =
4323                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4324 }
4325
4326 static int be_get_sriov_config(struct be_adapter *adapter)
4327 {
4328         struct be_resources res = {0};
4329         int max_vfs, old_vfs;
4330
4331         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4332                                   RESOURCE_LIMITS, 0);
4333
4334         /* Some old versions of BE3 FW don't report max_vfs value */
4335         if (BE3_chip(adapter) && !res.max_vfs) {
4336                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4337                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4338         }
4339
4340         adapter->pool_res = res;
4341
4342         /* If during previous unload of the driver, the VFs were not disabled,
4343          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4344          * Instead use the TotalVFs value stored in the pci-dev struct.
4345          */
4346         old_vfs = pci_num_vf(adapter->pdev);
4347         if (old_vfs) {
4348                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4349                          old_vfs);
4350
4351                 adapter->pool_res.max_vfs =
4352                         pci_sriov_get_totalvfs(adapter->pdev);
4353                 adapter->num_vfs = old_vfs;
4354         }
4355
4356         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4357                 be_calculate_pf_pool_rss_tables(adapter);
4358                 dev_info(&adapter->pdev->dev,
4359                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4360                          be_max_pf_pool_rss_tables(adapter));
4361         }
4362         return 0;
4363 }
4364
4365 static void be_alloc_sriov_res(struct be_adapter *adapter)
4366 {
4367         int old_vfs = pci_num_vf(adapter->pdev);
4368         struct  be_resources vft_res = {0};
4369         int status;
4370
4371         be_get_sriov_config(adapter);
4372
4373         if (!old_vfs)
4374                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4375
4376         /* When the HW is in SRIOV capable configuration, the PF-pool
4377          * resources are given to PF during driver load, if there are no
4378          * old VFs. This facility is not available in BE3 FW.
4379          * Also, this is done by FW in Lancer chip.
4380          */
4381         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4382                 be_calculate_vf_res(adapter, 0, &vft_res);
4383                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4384                                                  &vft_res);
4385                 if (status)
4386                         dev_err(&adapter->pdev->dev,
4387                                 "Failed to optimize SRIOV resources\n");
4388         }
4389 }
4390
4391 static int be_get_resources(struct be_adapter *adapter)
4392 {
4393         struct device *dev = &adapter->pdev->dev;
4394         struct be_resources res = {0};
4395         int status;
4396
4397         /* For Lancer, SH etc read per-function resource limits from FW.
4398          * GET_FUNC_CONFIG returns per function guaranteed limits.
4399          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4400          */
4401         if (BEx_chip(adapter)) {
4402                 BEx_get_resources(adapter, &res);
4403         } else {
4404                 status = be_cmd_get_func_config(adapter, &res);
4405                 if (status)
4406                         return status;
4407
4408                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4409                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4410                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4411                         res.max_rss_qs -= 1;
4412         }
4413
4414         /* If RoCE is supported stash away half the EQs for RoCE */
4415         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4416                                 res.max_evt_qs / 2 : res.max_evt_qs;
4417         adapter->res = res;
4418
4419         /* If FW supports RSS default queue, then skip creating non-RSS
4420          * queue for non-IP traffic.
4421          */
4422         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4423                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4424
4425         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4426                  be_max_txqs(adapter), be_max_rxqs(adapter),
4427                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4428                  be_max_vfs(adapter));
4429         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4430                  be_max_uc(adapter), be_max_mc(adapter),
4431                  be_max_vlans(adapter));
4432
4433         /* Ensure RX and TX queues are created in pairs at init time */
4434         adapter->cfg_num_rx_irqs =
4435                                 min_t(u16, netif_get_num_default_rss_queues(),
4436                                       be_max_qp_irqs(adapter));
4437         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4438         return 0;
4439 }
4440
4441 static int be_get_config(struct be_adapter *adapter)
4442 {
4443         int status, level;
4444         u16 profile_id;
4445
4446         status = be_cmd_get_cntl_attributes(adapter);
4447         if (status)
4448                 return status;
4449
4450         status = be_cmd_query_fw_cfg(adapter);
4451         if (status)
4452                 return status;
4453
4454         if (!lancer_chip(adapter) && be_physfn(adapter))
4455                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4456
4457         if (BEx_chip(adapter)) {
4458                 level = be_cmd_get_fw_log_level(adapter);
4459                 adapter->msg_enable =
4460                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4461         }
4462
4463         be_cmd_get_acpi_wol_cap(adapter);
4464         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4465         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4466
4467         be_cmd_query_port_name(adapter);
4468
4469         if (be_physfn(adapter)) {
4470                 status = be_cmd_get_active_profile(adapter, &profile_id);
4471                 if (!status)
4472                         dev_info(&adapter->pdev->dev,
4473                                  "Using profile 0x%x\n", profile_id);
4474         }
4475
4476         return 0;
4477 }
4478
4479 static int be_mac_setup(struct be_adapter *adapter)
4480 {
4481         u8 mac[ETH_ALEN];
4482         int status;
4483
4484         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4485                 status = be_cmd_get_perm_mac(adapter, mac);
4486                 if (status)
4487                         return status;
4488
4489                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4490                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4491
4492                 /* Initial MAC for BE3 VFs is already programmed by PF */
4493                 if (BEx_chip(adapter) && be_virtfn(adapter))
4494                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4495         }
4496
4497         return 0;
4498 }
4499
4500 static void be_schedule_worker(struct be_adapter *adapter)
4501 {
4502         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4503         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4504 }
4505
4506 static void be_destroy_err_recovery_workq(void)
4507 {
4508         if (!be_err_recovery_workq)
4509                 return;
4510
4511         flush_workqueue(be_err_recovery_workq);
4512         destroy_workqueue(be_err_recovery_workq);
4513         be_err_recovery_workq = NULL;
4514 }
4515
4516 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4517 {
4518         struct be_error_recovery *err_rec = &adapter->error_recovery;
4519
4520         if (!be_err_recovery_workq)
4521                 return;
4522
4523         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4524                            msecs_to_jiffies(delay));
4525         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4526 }
4527
4528 static int be_setup_queues(struct be_adapter *adapter)
4529 {
4530         struct net_device *netdev = adapter->netdev;
4531         int status;
4532
4533         status = be_evt_queues_create(adapter);
4534         if (status)
4535                 goto err;
4536
4537         status = be_tx_qs_create(adapter);
4538         if (status)
4539                 goto err;
4540
4541         status = be_rx_cqs_create(adapter);
4542         if (status)
4543                 goto err;
4544
4545         status = be_mcc_queues_create(adapter);
4546         if (status)
4547                 goto err;
4548
4549         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4550         if (status)
4551                 goto err;
4552
4553         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4554         if (status)
4555                 goto err;
4556
4557         return 0;
4558 err:
4559         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4560         return status;
4561 }
4562
4563 static int be_if_create(struct be_adapter *adapter)
4564 {
4565         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4566         u32 cap_flags = be_if_cap_flags(adapter);
4567         int status;
4568
4569         /* alloc required memory for other filtering fields */
4570         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4571                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4572         if (!adapter->pmac_id)
4573                 return -ENOMEM;
4574
4575         adapter->mc_list = kcalloc(be_max_mc(adapter),
4576                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4577         if (!adapter->mc_list)
4578                 return -ENOMEM;
4579
4580         adapter->uc_list = kcalloc(be_max_uc(adapter),
4581                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4582         if (!adapter->uc_list)
4583                 return -ENOMEM;
4584
4585         if (adapter->cfg_num_rx_irqs == 1)
4586                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4587
4588         en_flags &= cap_flags;
4589         /* will enable all the needed filter flags in be_open() */
4590         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4591                                   &adapter->if_handle, 0);
4592
4593         if (status)
4594                 return status;
4595
4596         return 0;
4597 }
4598
4599 int be_update_queues(struct be_adapter *adapter)
4600 {
4601         struct net_device *netdev = adapter->netdev;
4602         int status;
4603
4604         if (netif_running(netdev)) {
4605                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4606                 netif_carrier_off(netdev);
4607
4608                 be_close(netdev);
4609         }
4610
4611         be_cancel_worker(adapter);
4612
4613         /* If any vectors have been shared with RoCE we cannot re-program
4614          * the MSIx table.
4615          */
4616         if (!adapter->num_msix_roce_vec)
4617                 be_msix_disable(adapter);
4618
4619         be_clear_queues(adapter);
4620         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4621         if (status)
4622                 return status;
4623
4624         if (!msix_enabled(adapter)) {
4625                 status = be_msix_enable(adapter);
4626                 if (status)
4627                         return status;
4628         }
4629
4630         status = be_if_create(adapter);
4631         if (status)
4632                 return status;
4633
4634         status = be_setup_queues(adapter);
4635         if (status)
4636                 return status;
4637
4638         be_schedule_worker(adapter);
4639
4640         /*
4641          * The IF was destroyed and re-created. We need to clear
4642          * all promiscuous flags valid for the destroyed IF.
4643          * Without this promisc mode is not restored during
4644          * be_open() because the driver thinks that it is
4645          * already enabled in HW.
4646          */
4647         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4648
4649         if (netif_running(netdev))
4650                 status = be_open(netdev);
4651
4652         return status;
4653 }
4654
4655 static inline int fw_major_num(const char *fw_ver)
4656 {
4657         int fw_major = 0, i;
4658
4659         i = sscanf(fw_ver, "%d.", &fw_major);
4660         if (i != 1)
4661                 return 0;
4662
4663         return fw_major;
4664 }
4665
4666 /* If it is error recovery, FLR the PF
4667  * Else if any VFs are already enabled don't FLR the PF
4668  */
4669 static bool be_reset_required(struct be_adapter *adapter)
4670 {
4671         if (be_error_recovering(adapter))
4672                 return true;
4673         else
4674                 return pci_num_vf(adapter->pdev) == 0;
4675 }
4676
4677 /* Wait for the FW to be ready and perform the required initialization */
4678 static int be_func_init(struct be_adapter *adapter)
4679 {
4680         int status;
4681
4682         status = be_fw_wait_ready(adapter);
4683         if (status)
4684                 return status;
4685
4686         /* FW is now ready; clear errors to allow cmds/doorbell */
4687         be_clear_error(adapter, BE_CLEAR_ALL);
4688
4689         if (be_reset_required(adapter)) {
4690                 status = be_cmd_reset_function(adapter);
4691                 if (status)
4692                         return status;
4693
4694                 /* Wait for interrupts to quiesce after an FLR */
4695                 msleep(100);
4696         }
4697
4698         /* Tell FW we're ready to fire cmds */
4699         status = be_cmd_fw_init(adapter);
4700         if (status)
4701                 return status;
4702
4703         /* Allow interrupts for other ULPs running on NIC function */
4704         be_intr_set(adapter, true);
4705
4706         return 0;
4707 }
4708
4709 static int be_setup(struct be_adapter *adapter)
4710 {
4711         struct device *dev = &adapter->pdev->dev;
4712         int status;
4713
4714         status = be_func_init(adapter);
4715         if (status)
4716                 return status;
4717
4718         be_setup_init(adapter);
4719
4720         if (!lancer_chip(adapter))
4721                 be_cmd_req_native_mode(adapter);
4722
4723         /* invoke this cmd first to get pf_num and vf_num which are needed
4724          * for issuing profile related cmds
4725          */
4726         if (!BEx_chip(adapter)) {
4727                 status = be_cmd_get_func_config(adapter, NULL);
4728                 if (status)
4729                         return status;
4730         }
4731
4732         status = be_get_config(adapter);
4733         if (status)
4734                 goto err;
4735
4736         if (!BE2_chip(adapter) && be_physfn(adapter))
4737                 be_alloc_sriov_res(adapter);
4738
4739         status = be_get_resources(adapter);
4740         if (status)
4741                 goto err;
4742
4743         status = be_msix_enable(adapter);
4744         if (status)
4745                 goto err;
4746
4747         /* will enable all the needed filter flags in be_open() */
4748         status = be_if_create(adapter);
4749         if (status)
4750                 goto err;
4751
4752         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4753         rtnl_lock();
4754         status = be_setup_queues(adapter);
4755         rtnl_unlock();
4756         if (status)
4757                 goto err;
4758
4759         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4760
4761         status = be_mac_setup(adapter);
4762         if (status)
4763                 goto err;
4764
4765         be_cmd_get_fw_ver(adapter);
4766         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4767
4768         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4769                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4770                         adapter->fw_ver);
4771                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4772         }
4773
4774         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4775                                          adapter->rx_fc);
4776         if (status)
4777                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4778                                         &adapter->rx_fc);
4779
4780         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4781                  adapter->tx_fc, adapter->rx_fc);
4782
4783         if (be_physfn(adapter))
4784                 be_cmd_set_logical_link_config(adapter,
4785                                                IFLA_VF_LINK_STATE_AUTO, 0);
4786
4787         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4788          * confusing a linux bridge or OVS that it might be connected to.
4789          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4790          * when SRIOV is not enabled.
4791          */
4792         if (BE3_chip(adapter))
4793                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4794                                       PORT_FWD_TYPE_PASSTHRU, 0);
4795
4796         if (adapter->num_vfs)
4797                 be_vf_setup(adapter);
4798
4799         status = be_cmd_get_phy_info(adapter);
4800         if (!status && be_pause_supported(adapter))
4801                 adapter->phy.fc_autoneg = 1;
4802
4803         if (be_physfn(adapter) && !lancer_chip(adapter))
4804                 be_cmd_set_features(adapter);
4805
4806         be_schedule_worker(adapter);
4807         adapter->flags |= BE_FLAGS_SETUP_DONE;
4808         return 0;
4809 err:
4810         be_clear(adapter);
4811         return status;
4812 }
4813
4814 #ifdef CONFIG_NET_POLL_CONTROLLER
4815 static void be_netpoll(struct net_device *netdev)
4816 {
4817         struct be_adapter *adapter = netdev_priv(netdev);
4818         struct be_eq_obj *eqo;
4819         int i;
4820
4821         for_all_evt_queues(adapter, eqo, i) {
4822                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4823                 napi_schedule(&eqo->napi);
4824         }
4825 }
4826 #endif
4827
4828 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4829 {
4830         const struct firmware *fw;
4831         int status;
4832
4833         if (!netif_running(adapter->netdev)) {
4834                 dev_err(&adapter->pdev->dev,
4835                         "Firmware load not allowed (interface is down)\n");
4836                 return -ENETDOWN;
4837         }
4838
4839         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4840         if (status)
4841                 goto fw_exit;
4842
4843         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4844
4845         if (lancer_chip(adapter))
4846                 status = lancer_fw_download(adapter, fw);
4847         else
4848                 status = be_fw_download(adapter, fw);
4849
4850         if (!status)
4851                 be_cmd_get_fw_ver(adapter);
4852
4853 fw_exit:
4854         release_firmware(fw);
4855         return status;
4856 }
4857
4858 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4859                                  u16 flags)
4860 {
4861         struct be_adapter *adapter = netdev_priv(dev);
4862         struct nlattr *attr, *br_spec;
4863         int rem;
4864         int status = 0;
4865         u16 mode = 0;
4866
4867         if (!sriov_enabled(adapter))
4868                 return -EOPNOTSUPP;
4869
4870         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4871         if (!br_spec)
4872                 return -EINVAL;
4873
4874         nla_for_each_nested(attr, br_spec, rem) {
4875                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4876                         continue;
4877
4878                 if (nla_len(attr) < sizeof(mode))
4879                         return -EINVAL;
4880
4881                 mode = nla_get_u16(attr);
4882                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4883                         return -EOPNOTSUPP;
4884
4885                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4886                         return -EINVAL;
4887
4888                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4889                                                adapter->if_handle,
4890                                                mode == BRIDGE_MODE_VEPA ?
4891                                                PORT_FWD_TYPE_VEPA :
4892                                                PORT_FWD_TYPE_VEB, 0);
4893                 if (status)
4894                         goto err;
4895
4896                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4897                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4898
4899                 return status;
4900         }
4901 err:
4902         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4903                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4904
4905         return status;
4906 }
4907
4908 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4909                                  struct net_device *dev, u32 filter_mask,
4910                                  int nlflags)
4911 {
4912         struct be_adapter *adapter = netdev_priv(dev);
4913         int status = 0;
4914         u8 hsw_mode;
4915
4916         /* BE and Lancer chips support VEB mode only */
4917         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4918                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4919                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4920                         return 0;
4921                 hsw_mode = PORT_FWD_TYPE_VEB;
4922         } else {
4923                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4924                                                adapter->if_handle, &hsw_mode,
4925                                                NULL);
4926                 if (status)
4927                         return 0;
4928
4929                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4930                         return 0;
4931         }
4932
4933         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4934                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4935                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4936                                        0, 0, nlflags, filter_mask, NULL);
4937 }
4938
4939 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4940                                          void (*func)(struct work_struct *))
4941 {
4942         struct be_cmd_work *work;
4943
4944         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4945         if (!work) {
4946                 dev_err(&adapter->pdev->dev,
4947                         "be_work memory allocation failed\n");
4948                 return NULL;
4949         }
4950
4951         INIT_WORK(&work->work, func);
4952         work->adapter = adapter;
4953         return work;
4954 }
4955
4956 /* VxLAN offload Notes:
4957  *
4958  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4959  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4960  * is expected to work across all types of IP tunnels once exported. Skyhawk
4961  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4962  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4963  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4964  * those other tunnels are unexported on the fly through ndo_features_check().
4965  *
4966  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4967  * adds more than one port, disable offloads and re-enable them again when
4968  * there's only one port left. We maintain a list of ports for this purpose.
4969  */
4970 static void be_work_add_vxlan_port(struct work_struct *work)
4971 {
4972         struct be_cmd_work *cmd_work =
4973                                 container_of(work, struct be_cmd_work, work);
4974         struct be_adapter *adapter = cmd_work->adapter;
4975         struct device *dev = &adapter->pdev->dev;
4976         __be16 port = cmd_work->info.vxlan_port;
4977         struct be_vxlan_port *vxlan_port;
4978         int status;
4979
4980         /* Bump up the alias count if it is an existing port */
4981         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4982                 if (vxlan_port->port == port) {
4983                         vxlan_port->port_aliases++;
4984                         goto done;
4985                 }
4986         }
4987
4988         /* Add a new port to our list. We don't need a lock here since port
4989          * add/delete are done only in the context of a single-threaded work
4990          * queue (be_wq).
4991          */
4992         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4993         if (!vxlan_port)
4994                 goto done;
4995
4996         vxlan_port->port = port;
4997         INIT_LIST_HEAD(&vxlan_port->list);
4998         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4999         adapter->vxlan_port_count++;
5000
5001         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5002                 dev_info(dev,
5003                          "Only one UDP port supported for VxLAN offloads\n");
5004                 dev_info(dev, "Disabling VxLAN offloads\n");
5005                 goto err;
5006         }
5007
5008         if (adapter->vxlan_port_count > 1)
5009                 goto done;
5010
5011         status = be_enable_vxlan_offloads(adapter);
5012         if (!status)
5013                 goto done;
5014
5015 err:
5016         be_disable_vxlan_offloads(adapter);
5017 done:
5018         kfree(cmd_work);
5019         return;
5020 }
5021
5022 static void be_work_del_vxlan_port(struct work_struct *work)
5023 {
5024         struct be_cmd_work *cmd_work =
5025                                 container_of(work, struct be_cmd_work, work);
5026         struct be_adapter *adapter = cmd_work->adapter;
5027         __be16 port = cmd_work->info.vxlan_port;
5028         struct be_vxlan_port *vxlan_port;
5029
5030         /* Nothing to be done if a port alias is being deleted */
5031         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5032                 if (vxlan_port->port == port) {
5033                         if (vxlan_port->port_aliases) {
5034                                 vxlan_port->port_aliases--;
5035                                 goto done;
5036                         }
5037                         break;
5038                 }
5039         }
5040
5041         /* No port aliases left; delete the port from the list */
5042         list_del(&vxlan_port->list);
5043         adapter->vxlan_port_count--;
5044
5045         /* Disable VxLAN offload if this is the offloaded port */
5046         if (adapter->vxlan_port == vxlan_port->port) {
5047                 WARN_ON(adapter->vxlan_port_count);
5048                 be_disable_vxlan_offloads(adapter);
5049                 dev_info(&adapter->pdev->dev,
5050                          "Disabled VxLAN offloads for UDP port %d\n",
5051                          be16_to_cpu(port));
5052                 goto out;
5053         }
5054
5055         /* If only 1 port is left, re-enable VxLAN offload */
5056         if (adapter->vxlan_port_count == 1)
5057                 be_enable_vxlan_offloads(adapter);
5058
5059 out:
5060         kfree(vxlan_port);
5061 done:
5062         kfree(cmd_work);
5063 }
5064
5065 static void be_cfg_vxlan_port(struct net_device *netdev,
5066                               struct udp_tunnel_info *ti,
5067                               void (*func)(struct work_struct *))
5068 {
5069         struct be_adapter *adapter = netdev_priv(netdev);
5070         struct be_cmd_work *cmd_work;
5071
5072         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5073                 return;
5074
5075         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5076                 return;
5077
5078         cmd_work = be_alloc_work(adapter, func);
5079         if (cmd_work) {
5080                 cmd_work->info.vxlan_port = ti->port;
5081                 queue_work(be_wq, &cmd_work->work);
5082         }
5083 }
5084
5085 static void be_del_vxlan_port(struct net_device *netdev,
5086                               struct udp_tunnel_info *ti)
5087 {
5088         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5089 }
5090
5091 static void be_add_vxlan_port(struct net_device *netdev,
5092                               struct udp_tunnel_info *ti)
5093 {
5094         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5095 }
5096
5097 static netdev_features_t be_features_check(struct sk_buff *skb,
5098                                            struct net_device *dev,
5099                                            netdev_features_t features)
5100 {
5101         struct be_adapter *adapter = netdev_priv(dev);
5102         u8 l4_hdr = 0;
5103
5104         if (skb_is_gso(skb)) {
5105                 /* IPv6 TSO requests with extension hdrs are a problem
5106                  * to Lancer and BE3 HW. Disable TSO6 feature.
5107                  */
5108                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5109                         features &= ~NETIF_F_TSO6;
5110
5111                 /* Lancer cannot handle the packet with MSS less than 256.
5112                  * Disable the GSO support in such cases
5113                  */
5114                 if (lancer_chip(adapter) && skb_shinfo(skb)->gso_size < 256)
5115                         features &= ~NETIF_F_GSO_MASK;
5116         }
5117
5118         /* The code below restricts offload features for some tunneled and
5119          * Q-in-Q packets.
5120          * Offload features for normal (non tunnel) packets are unchanged.
5121          */
5122         features = vlan_features_check(skb, features);
5123         if (!skb->encapsulation ||
5124             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5125                 return features;
5126
5127         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5128          * should disable tunnel offload features if it's not a VxLAN packet,
5129          * as tunnel offloads have been enabled only for VxLAN. This is done to
5130          * allow other tunneled traffic like GRE work fine while VxLAN
5131          * offloads are configured in Skyhawk-R.
5132          */
5133         switch (vlan_get_protocol(skb)) {
5134         case htons(ETH_P_IP):
5135                 l4_hdr = ip_hdr(skb)->protocol;
5136                 break;
5137         case htons(ETH_P_IPV6):
5138                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5139                 break;
5140         default:
5141                 return features;
5142         }
5143
5144         if (l4_hdr != IPPROTO_UDP ||
5145             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5146             skb->inner_protocol != htons(ETH_P_TEB) ||
5147             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5148                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5149             !adapter->vxlan_port ||
5150             udp_hdr(skb)->dest != adapter->vxlan_port)
5151                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5152
5153         return features;
5154 }
5155
5156 static int be_get_phys_port_id(struct net_device *dev,
5157                                struct netdev_phys_item_id *ppid)
5158 {
5159         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5160         struct be_adapter *adapter = netdev_priv(dev);
5161         u8 *id;
5162
5163         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5164                 return -ENOSPC;
5165
5166         ppid->id[0] = adapter->hba_port_num + 1;
5167         id = &ppid->id[1];
5168         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5169              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5170                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5171
5172         ppid->id_len = id_len;
5173
5174         return 0;
5175 }
5176
5177 static void be_set_rx_mode(struct net_device *dev)
5178 {
5179         struct be_adapter *adapter = netdev_priv(dev);
5180         struct be_cmd_work *work;
5181
5182         work = be_alloc_work(adapter, be_work_set_rx_mode);
5183         if (work)
5184                 queue_work(be_wq, &work->work);
5185 }
5186
5187 static const struct net_device_ops be_netdev_ops = {
5188         .ndo_open               = be_open,
5189         .ndo_stop               = be_close,
5190         .ndo_start_xmit         = be_xmit,
5191         .ndo_set_rx_mode        = be_set_rx_mode,
5192         .ndo_set_mac_address    = be_mac_addr_set,
5193         .ndo_get_stats64        = be_get_stats64,
5194         .ndo_validate_addr      = eth_validate_addr,
5195         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5196         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5197         .ndo_set_vf_mac         = be_set_vf_mac,
5198         .ndo_set_vf_vlan        = be_set_vf_vlan,
5199         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5200         .ndo_get_vf_config      = be_get_vf_config,
5201         .ndo_set_vf_link_state  = be_set_vf_link_state,
5202         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5203 #ifdef CONFIG_NET_POLL_CONTROLLER
5204         .ndo_poll_controller    = be_netpoll,
5205 #endif
5206         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5207         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5208         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5209         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5210         .ndo_features_check     = be_features_check,
5211         .ndo_get_phys_port_id   = be_get_phys_port_id,
5212 };
5213
5214 static void be_netdev_init(struct net_device *netdev)
5215 {
5216         struct be_adapter *adapter = netdev_priv(netdev);
5217
5218         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5219                 NETIF_F_GSO_UDP_TUNNEL |
5220                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5221                 NETIF_F_HW_VLAN_CTAG_TX;
5222         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5223                 netdev->hw_features |= NETIF_F_RXHASH;
5224
5225         netdev->features |= netdev->hw_features |
5226                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5227
5228         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5229                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5230
5231         netdev->priv_flags |= IFF_UNICAST_FLT;
5232
5233         netdev->flags |= IFF_MULTICAST;
5234
5235         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5236
5237         netdev->netdev_ops = &be_netdev_ops;
5238
5239         netdev->ethtool_ops = &be_ethtool_ops;
5240
5241         /* MTU range: 256 - 9000 */
5242         netdev->min_mtu = BE_MIN_MTU;
5243         netdev->max_mtu = BE_MAX_MTU;
5244 }
5245
5246 static void be_cleanup(struct be_adapter *adapter)
5247 {
5248         struct net_device *netdev = adapter->netdev;
5249
5250         rtnl_lock();
5251         netif_device_detach(netdev);
5252         if (netif_running(netdev))
5253                 be_close(netdev);
5254         rtnl_unlock();
5255
5256         be_clear(adapter);
5257 }
5258
5259 static int be_resume(struct be_adapter *adapter)
5260 {
5261         struct net_device *netdev = adapter->netdev;
5262         int status;
5263
5264         status = be_setup(adapter);
5265         if (status)
5266                 return status;
5267
5268         rtnl_lock();
5269         if (netif_running(netdev))
5270                 status = be_open(netdev);
5271         rtnl_unlock();
5272
5273         if (status)
5274                 return status;
5275
5276         netif_device_attach(netdev);
5277
5278         return 0;
5279 }
5280
5281 static void be_soft_reset(struct be_adapter *adapter)
5282 {
5283         u32 val;
5284
5285         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5286         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5287         val |= SLIPORT_SOFTRESET_SR_MASK;
5288         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5289 }
5290
5291 static bool be_err_is_recoverable(struct be_adapter *adapter)
5292 {
5293         struct be_error_recovery *err_rec = &adapter->error_recovery;
5294         unsigned long initial_idle_time =
5295                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5296         unsigned long recovery_interval =
5297                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5298         u16 ue_err_code;
5299         u32 val;
5300
5301         val = be_POST_stage_get(adapter);
5302         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5303                 return false;
5304         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5305         if (ue_err_code == 0)
5306                 return false;
5307
5308         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5309                 ue_err_code);
5310
5311         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5312                 dev_err(&adapter->pdev->dev,
5313                         "Cannot recover within %lu sec from driver load\n",
5314                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5315                 return false;
5316         }
5317
5318         if (err_rec->last_recovery_time && time_before_eq(
5319                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5320                 dev_err(&adapter->pdev->dev,
5321                         "Cannot recover within %lu sec from last recovery\n",
5322                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5323                 return false;
5324         }
5325
5326         if (ue_err_code == err_rec->last_err_code) {
5327                 dev_err(&adapter->pdev->dev,
5328                         "Cannot recover from a consecutive TPE error\n");
5329                 return false;
5330         }
5331
5332         err_rec->last_recovery_time = jiffies;
5333         err_rec->last_err_code = ue_err_code;
5334         return true;
5335 }
5336
5337 static int be_tpe_recover(struct be_adapter *adapter)
5338 {
5339         struct be_error_recovery *err_rec = &adapter->error_recovery;
5340         int status = -EAGAIN;
5341         u32 val;
5342
5343         switch (err_rec->recovery_state) {
5344         case ERR_RECOVERY_ST_NONE:
5345                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5346                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5347                 break;
5348
5349         case ERR_RECOVERY_ST_DETECT:
5350                 val = be_POST_stage_get(adapter);
5351                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5352                     POST_STAGE_RECOVERABLE_ERR) {
5353                         dev_err(&adapter->pdev->dev,
5354                                 "Unrecoverable HW error detected: 0x%x\n", val);
5355                         status = -EINVAL;
5356                         err_rec->resched_delay = 0;
5357                         break;
5358                 }
5359
5360                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5361
5362                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5363                  * milliseconds before it checks for final error status in
5364                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5365                  * If it does, then PF0 initiates a Soft Reset.
5366                  */
5367                 if (adapter->pf_num == 0) {
5368                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5369                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5370                                         ERR_RECOVERY_UE_DETECT_DURATION;
5371                         break;
5372                 }
5373
5374                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5375                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5376                                         ERR_RECOVERY_UE_DETECT_DURATION;
5377                 break;
5378
5379         case ERR_RECOVERY_ST_RESET:
5380                 if (!be_err_is_recoverable(adapter)) {
5381                         dev_err(&adapter->pdev->dev,
5382                                 "Failed to meet recovery criteria\n");
5383                         status = -EIO;
5384                         err_rec->resched_delay = 0;
5385                         break;
5386                 }
5387                 be_soft_reset(adapter);
5388                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5389                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5390                                         err_rec->ue_to_reset_time;
5391                 break;
5392
5393         case ERR_RECOVERY_ST_PRE_POLL:
5394                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5395                 err_rec->resched_delay = 0;
5396                 status = 0;                     /* done */
5397                 break;
5398
5399         default:
5400                 status = -EINVAL;
5401                 err_rec->resched_delay = 0;
5402                 break;
5403         }
5404
5405         return status;
5406 }
5407
5408 static int be_err_recover(struct be_adapter *adapter)
5409 {
5410         int status;
5411
5412         if (!lancer_chip(adapter)) {
5413                 if (!adapter->error_recovery.recovery_supported ||
5414                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5415                         return -EIO;
5416                 status = be_tpe_recover(adapter);
5417                 if (status)
5418                         goto err;
5419         }
5420
5421         /* Wait for adapter to reach quiescent state before
5422          * destroying queues
5423          */
5424         status = be_fw_wait_ready(adapter);
5425         if (status)
5426                 goto err;
5427
5428         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5429
5430         be_cleanup(adapter);
5431
5432         status = be_resume(adapter);
5433         if (status)
5434                 goto err;
5435
5436         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5437
5438 err:
5439         return status;
5440 }
5441
5442 static void be_err_detection_task(struct work_struct *work)
5443 {
5444         struct be_error_recovery *err_rec =
5445                         container_of(work, struct be_error_recovery,
5446                                      err_detection_work.work);
5447         struct be_adapter *adapter =
5448                         container_of(err_rec, struct be_adapter,
5449                                      error_recovery);
5450         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5451         struct device *dev = &adapter->pdev->dev;
5452         int recovery_status;
5453
5454         be_detect_error(adapter);
5455         if (!be_check_error(adapter, BE_ERROR_HW))
5456                 goto reschedule_task;
5457
5458         recovery_status = be_err_recover(adapter);
5459         if (!recovery_status) {
5460                 err_rec->recovery_retries = 0;
5461                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5462                 dev_info(dev, "Adapter recovery successful\n");
5463                 goto reschedule_task;
5464         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5465                 /* BEx/SH recovery state machine */
5466                 if (adapter->pf_num == 0 &&
5467                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5468                         dev_err(&adapter->pdev->dev,
5469                                 "Adapter recovery in progress\n");
5470                 resched_delay = err_rec->resched_delay;
5471                 goto reschedule_task;
5472         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5473                 /* For VFs, check if PF have allocated resources
5474                  * every second.
5475                  */
5476                 dev_err(dev, "Re-trying adapter recovery\n");
5477                 goto reschedule_task;
5478         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5479                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5480                 /* In case of another error during recovery, it takes 30 sec
5481                  * for adapter to come out of error. Retry error recovery after
5482                  * this time interval.
5483                  */
5484                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5485                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5486                 goto reschedule_task;
5487         } else {
5488                 dev_err(dev, "Adapter recovery failed\n");
5489                 dev_err(dev, "Please reboot server to recover\n");
5490         }
5491
5492         return;
5493
5494 reschedule_task:
5495         be_schedule_err_detection(adapter, resched_delay);
5496 }
5497
5498 static void be_log_sfp_info(struct be_adapter *adapter)
5499 {
5500         int status;
5501
5502         status = be_cmd_query_sfp_info(adapter);
5503         if (!status) {
5504                 dev_err(&adapter->pdev->dev,
5505                         "Port %c: %s Vendor: %s part no: %s",
5506                         adapter->port_name,
5507                         be_misconfig_evt_port_state[adapter->phy_state],
5508                         adapter->phy.vendor_name,
5509                         adapter->phy.vendor_pn);
5510         }
5511         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5512 }
5513
5514 static void be_worker(struct work_struct *work)
5515 {
5516         struct be_adapter *adapter =
5517                 container_of(work, struct be_adapter, work.work);
5518         struct be_rx_obj *rxo;
5519         int i;
5520
5521         if (be_physfn(adapter) &&
5522             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5523                 be_cmd_get_die_temperature(adapter);
5524
5525         /* when interrupts are not yet enabled, just reap any pending
5526          * mcc completions
5527          */
5528         if (!netif_running(adapter->netdev)) {
5529                 local_bh_disable();
5530                 be_process_mcc(adapter);
5531                 local_bh_enable();
5532                 goto reschedule;
5533         }
5534
5535         if (!adapter->stats_cmd_sent) {
5536                 if (lancer_chip(adapter))
5537                         lancer_cmd_get_pport_stats(adapter,
5538                                                    &adapter->stats_cmd);
5539                 else
5540                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5541         }
5542
5543         for_all_rx_queues(adapter, rxo, i) {
5544                 /* Replenish RX-queues starved due to memory
5545                  * allocation failures.
5546                  */
5547                 if (rxo->rx_post_starved)
5548                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5549         }
5550
5551         /* EQ-delay update for Skyhawk is done while notifying EQ */
5552         if (!skyhawk_chip(adapter))
5553                 be_eqd_update(adapter, false);
5554
5555         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5556                 be_log_sfp_info(adapter);
5557
5558 reschedule:
5559         adapter->work_counter++;
5560         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5561 }
5562
5563 static void be_unmap_pci_bars(struct be_adapter *adapter)
5564 {
5565         if (adapter->csr)
5566                 pci_iounmap(adapter->pdev, adapter->csr);
5567         if (adapter->db)
5568                 pci_iounmap(adapter->pdev, adapter->db);
5569         if (adapter->pcicfg && adapter->pcicfg_mapped)
5570                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5571 }
5572
5573 static int db_bar(struct be_adapter *adapter)
5574 {
5575         if (lancer_chip(adapter) || be_virtfn(adapter))
5576                 return 0;
5577         else
5578                 return 4;
5579 }
5580
5581 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5582 {
5583         if (skyhawk_chip(adapter)) {
5584                 adapter->roce_db.size = 4096;
5585                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5586                                                               db_bar(adapter));
5587                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5588                                                                db_bar(adapter));
5589         }
5590         return 0;
5591 }
5592
5593 static int be_map_pci_bars(struct be_adapter *adapter)
5594 {
5595         struct pci_dev *pdev = adapter->pdev;
5596         u8 __iomem *addr;
5597         u32 sli_intf;
5598
5599         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5600         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5601                                 SLI_INTF_FAMILY_SHIFT;
5602         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5603
5604         if (BEx_chip(adapter) && be_physfn(adapter)) {
5605                 adapter->csr = pci_iomap(pdev, 2, 0);
5606                 if (!adapter->csr)
5607                         return -ENOMEM;
5608         }
5609
5610         addr = pci_iomap(pdev, db_bar(adapter), 0);
5611         if (!addr)
5612                 goto pci_map_err;
5613         adapter->db = addr;
5614
5615         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5616                 if (be_physfn(adapter)) {
5617                         /* PCICFG is the 2nd BAR in BE2 */
5618                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5619                         if (!addr)
5620                                 goto pci_map_err;
5621                         adapter->pcicfg = addr;
5622                         adapter->pcicfg_mapped = true;
5623                 } else {
5624                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5625                         adapter->pcicfg_mapped = false;
5626                 }
5627         }
5628
5629         be_roce_map_pci_bars(adapter);
5630         return 0;
5631
5632 pci_map_err:
5633         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5634         be_unmap_pci_bars(adapter);
5635         return -ENOMEM;
5636 }
5637
5638 static void be_drv_cleanup(struct be_adapter *adapter)
5639 {
5640         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5641         struct device *dev = &adapter->pdev->dev;
5642
5643         if (mem->va)
5644                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5645
5646         mem = &adapter->rx_filter;
5647         if (mem->va)
5648                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5649
5650         mem = &adapter->stats_cmd;
5651         if (mem->va)
5652                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5653 }
5654
5655 /* Allocate and initialize various fields in be_adapter struct */
5656 static int be_drv_init(struct be_adapter *adapter)
5657 {
5658         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5659         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5660         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5661         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5662         struct device *dev = &adapter->pdev->dev;
5663         int status = 0;
5664
5665         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5666         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5667                                                  &mbox_mem_alloc->dma,
5668                                                  GFP_KERNEL);
5669         if (!mbox_mem_alloc->va)
5670                 return -ENOMEM;
5671
5672         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5673         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5674         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5675
5676         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5677         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5678                                             &rx_filter->dma, GFP_KERNEL);
5679         if (!rx_filter->va) {
5680                 status = -ENOMEM;
5681                 goto free_mbox;
5682         }
5683
5684         if (lancer_chip(adapter))
5685                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5686         else if (BE2_chip(adapter))
5687                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5688         else if (BE3_chip(adapter))
5689                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5690         else
5691                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5692         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5693                                             &stats_cmd->dma, GFP_KERNEL);
5694         if (!stats_cmd->va) {
5695                 status = -ENOMEM;
5696                 goto free_rx_filter;
5697         }
5698
5699         mutex_init(&adapter->mbox_lock);
5700         mutex_init(&adapter->mcc_lock);
5701         mutex_init(&adapter->rx_filter_lock);
5702         spin_lock_init(&adapter->mcc_cq_lock);
5703         init_completion(&adapter->et_cmd_compl);
5704
5705         pci_save_state(adapter->pdev);
5706
5707         INIT_DELAYED_WORK(&adapter->work, be_worker);
5708
5709         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5710         adapter->error_recovery.resched_delay = 0;
5711         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5712                           be_err_detection_task);
5713
5714         adapter->rx_fc = true;
5715         adapter->tx_fc = true;
5716
5717         /* Must be a power of 2 or else MODULO will BUG_ON */
5718         adapter->be_get_temp_freq = 64;
5719
5720         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5721         return 0;
5722
5723 free_rx_filter:
5724         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5725 free_mbox:
5726         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5727                           mbox_mem_alloc->dma);
5728         return status;
5729 }
5730
5731 static void be_remove(struct pci_dev *pdev)
5732 {
5733         struct be_adapter *adapter = pci_get_drvdata(pdev);
5734
5735         if (!adapter)
5736                 return;
5737
5738         be_roce_dev_remove(adapter);
5739         be_intr_set(adapter, false);
5740
5741         be_cancel_err_detection(adapter);
5742
5743         unregister_netdev(adapter->netdev);
5744
5745         be_clear(adapter);
5746
5747         if (!pci_vfs_assigned(adapter->pdev))
5748                 be_cmd_reset_function(adapter);
5749
5750         /* tell fw we're done with firing cmds */
5751         be_cmd_fw_clean(adapter);
5752
5753         be_unmap_pci_bars(adapter);
5754         be_drv_cleanup(adapter);
5755
5756         pci_disable_pcie_error_reporting(pdev);
5757
5758         pci_release_regions(pdev);
5759         pci_disable_device(pdev);
5760
5761         free_netdev(adapter->netdev);
5762 }
5763
5764 static ssize_t be_hwmon_show_temp(struct device *dev,
5765                                   struct device_attribute *dev_attr,
5766                                   char *buf)
5767 {
5768         struct be_adapter *adapter = dev_get_drvdata(dev);
5769
5770         /* Unit: millidegree Celsius */
5771         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5772                 return -EIO;
5773         else
5774                 return sprintf(buf, "%u\n",
5775                                adapter->hwmon_info.be_on_die_temp * 1000);
5776 }
5777
5778 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5779                           be_hwmon_show_temp, NULL, 1);
5780
5781 static struct attribute *be_hwmon_attrs[] = {
5782         &sensor_dev_attr_temp1_input.dev_attr.attr,
5783         NULL
5784 };
5785
5786 ATTRIBUTE_GROUPS(be_hwmon);
5787
5788 static char *mc_name(struct be_adapter *adapter)
5789 {
5790         char *str = ""; /* default */
5791
5792         switch (adapter->mc_type) {
5793         case UMC:
5794                 str = "UMC";
5795                 break;
5796         case FLEX10:
5797                 str = "FLEX10";
5798                 break;
5799         case vNIC1:
5800                 str = "vNIC-1";
5801                 break;
5802         case nPAR:
5803                 str = "nPAR";
5804                 break;
5805         case UFP:
5806                 str = "UFP";
5807                 break;
5808         case vNIC2:
5809                 str = "vNIC-2";
5810                 break;
5811         default:
5812                 str = "";
5813         }
5814
5815         return str;
5816 }
5817
5818 static inline char *func_name(struct be_adapter *adapter)
5819 {
5820         return be_physfn(adapter) ? "PF" : "VF";
5821 }
5822
5823 static inline char *nic_name(struct pci_dev *pdev)
5824 {
5825         switch (pdev->device) {
5826         case OC_DEVICE_ID1:
5827                 return OC_NAME;
5828         case OC_DEVICE_ID2:
5829                 return OC_NAME_BE;
5830         case OC_DEVICE_ID3:
5831         case OC_DEVICE_ID4:
5832                 return OC_NAME_LANCER;
5833         case BE_DEVICE_ID2:
5834                 return BE3_NAME;
5835         case OC_DEVICE_ID5:
5836         case OC_DEVICE_ID6:
5837                 return OC_NAME_SH;
5838         default:
5839                 return BE_NAME;
5840         }
5841 }
5842
5843 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5844 {
5845         struct be_adapter *adapter;
5846         struct net_device *netdev;
5847         int status = 0;
5848
5849         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5850
5851         status = pci_enable_device(pdev);
5852         if (status)
5853                 goto do_none;
5854
5855         status = pci_request_regions(pdev, DRV_NAME);
5856         if (status)
5857                 goto disable_dev;
5858         pci_set_master(pdev);
5859
5860         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5861         if (!netdev) {
5862                 status = -ENOMEM;
5863                 goto rel_reg;
5864         }
5865         adapter = netdev_priv(netdev);
5866         adapter->pdev = pdev;
5867         pci_set_drvdata(pdev, adapter);
5868         adapter->netdev = netdev;
5869         SET_NETDEV_DEV(netdev, &pdev->dev);
5870
5871         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5872         if (!status) {
5873                 netdev->features |= NETIF_F_HIGHDMA;
5874         } else {
5875                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5876                 if (status) {
5877                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5878                         goto free_netdev;
5879                 }
5880         }
5881
5882         status = pci_enable_pcie_error_reporting(pdev);
5883         if (!status)
5884                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5885
5886         status = be_map_pci_bars(adapter);
5887         if (status)
5888                 goto free_netdev;
5889
5890         status = be_drv_init(adapter);
5891         if (status)
5892                 goto unmap_bars;
5893
5894         status = be_setup(adapter);
5895         if (status)
5896                 goto drv_cleanup;
5897
5898         be_netdev_init(netdev);
5899         status = register_netdev(netdev);
5900         if (status != 0)
5901                 goto unsetup;
5902
5903         be_roce_dev_add(adapter);
5904
5905         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5906         adapter->error_recovery.probe_time = jiffies;
5907
5908         /* On Die temperature not supported for VF. */
5909         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5910                 adapter->hwmon_info.hwmon_dev =
5911                         devm_hwmon_device_register_with_groups(&pdev->dev,
5912                                                                DRV_NAME,
5913                                                                adapter,
5914                                                                be_hwmon_groups);
5915                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5916         }
5917
5918         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5919                  func_name(adapter), mc_name(adapter), adapter->port_name);
5920
5921         return 0;
5922
5923 unsetup:
5924         be_clear(adapter);
5925 drv_cleanup:
5926         be_drv_cleanup(adapter);
5927 unmap_bars:
5928         be_unmap_pci_bars(adapter);
5929 free_netdev:
5930         pci_disable_pcie_error_reporting(pdev);
5931         free_netdev(netdev);
5932 rel_reg:
5933         pci_release_regions(pdev);
5934 disable_dev:
5935         pci_disable_device(pdev);
5936 do_none:
5937         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5938         return status;
5939 }
5940
5941 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5942 {
5943         struct be_adapter *adapter = pci_get_drvdata(pdev);
5944
5945         be_intr_set(adapter, false);
5946         be_cancel_err_detection(adapter);
5947
5948         be_cleanup(adapter);
5949
5950         pci_save_state(pdev);
5951         pci_disable_device(pdev);
5952         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5953         return 0;
5954 }
5955
5956 static int be_pci_resume(struct pci_dev *pdev)
5957 {
5958         struct be_adapter *adapter = pci_get_drvdata(pdev);
5959         int status = 0;
5960
5961         status = pci_enable_device(pdev);
5962         if (status)
5963                 return status;
5964
5965         pci_restore_state(pdev);
5966
5967         status = be_resume(adapter);
5968         if (status)
5969                 return status;
5970
5971         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5972
5973         return 0;
5974 }
5975
5976 /*
5977  * An FLR will stop BE from DMAing any data.
5978  */
5979 static void be_shutdown(struct pci_dev *pdev)
5980 {
5981         struct be_adapter *adapter = pci_get_drvdata(pdev);
5982
5983         if (!adapter)
5984                 return;
5985
5986         be_roce_dev_shutdown(adapter);
5987         cancel_delayed_work_sync(&adapter->work);
5988         be_cancel_err_detection(adapter);
5989
5990         netif_device_detach(adapter->netdev);
5991
5992         be_cmd_reset_function(adapter);
5993
5994         pci_disable_device(pdev);
5995 }
5996
5997 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5998                                             pci_channel_state_t state)
5999 {
6000         struct be_adapter *adapter = pci_get_drvdata(pdev);
6001
6002         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6003
6004         be_roce_dev_remove(adapter);
6005
6006         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6007                 be_set_error(adapter, BE_ERROR_EEH);
6008
6009                 be_cancel_err_detection(adapter);
6010
6011                 be_cleanup(adapter);
6012         }
6013
6014         if (state == pci_channel_io_perm_failure)
6015                 return PCI_ERS_RESULT_DISCONNECT;
6016
6017         pci_disable_device(pdev);
6018
6019         /* The error could cause the FW to trigger a flash debug dump.
6020          * Resetting the card while flash dump is in progress
6021          * can cause it not to recover; wait for it to finish.
6022          * Wait only for first function as it is needed only once per
6023          * adapter.
6024          */
6025         if (pdev->devfn == 0)
6026                 ssleep(30);
6027
6028         return PCI_ERS_RESULT_NEED_RESET;
6029 }
6030
6031 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6032 {
6033         struct be_adapter *adapter = pci_get_drvdata(pdev);
6034         int status;
6035
6036         dev_info(&adapter->pdev->dev, "EEH reset\n");
6037
6038         status = pci_enable_device(pdev);
6039         if (status)
6040                 return PCI_ERS_RESULT_DISCONNECT;
6041
6042         pci_set_master(pdev);
6043         pci_restore_state(pdev);
6044
6045         /* Check if card is ok and fw is ready */
6046         dev_info(&adapter->pdev->dev,
6047                  "Waiting for FW to be ready after EEH reset\n");
6048         status = be_fw_wait_ready(adapter);
6049         if (status)
6050                 return PCI_ERS_RESULT_DISCONNECT;
6051
6052         pci_cleanup_aer_uncorrect_error_status(pdev);
6053         be_clear_error(adapter, BE_CLEAR_ALL);
6054         return PCI_ERS_RESULT_RECOVERED;
6055 }
6056
6057 static void be_eeh_resume(struct pci_dev *pdev)
6058 {
6059         int status = 0;
6060         struct be_adapter *adapter = pci_get_drvdata(pdev);
6061
6062         dev_info(&adapter->pdev->dev, "EEH resume\n");
6063
6064         pci_save_state(pdev);
6065
6066         status = be_resume(adapter);
6067         if (status)
6068                 goto err;
6069
6070         be_roce_dev_add(adapter);
6071
6072         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6073         return;
6074 err:
6075         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6076 }
6077
6078 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6079 {
6080         struct be_adapter *adapter = pci_get_drvdata(pdev);
6081         struct be_resources vft_res = {0};
6082         int status;
6083
6084         if (!num_vfs)
6085                 be_vf_clear(adapter);
6086
6087         adapter->num_vfs = num_vfs;
6088
6089         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6090                 dev_warn(&pdev->dev,
6091                          "Cannot disable VFs while they are assigned\n");
6092                 return -EBUSY;
6093         }
6094
6095         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6096          * are equally distributed across the max-number of VFs. The user may
6097          * request only a subset of the max-vfs to be enabled.
6098          * Based on num_vfs, redistribute the resources across num_vfs so that
6099          * each VF will have access to more number of resources.
6100          * This facility is not available in BE3 FW.
6101          * Also, this is done by FW in Lancer chip.
6102          */
6103         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6104                 be_calculate_vf_res(adapter, adapter->num_vfs,
6105                                     &vft_res);
6106                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6107                                                  adapter->num_vfs, &vft_res);
6108                 if (status)
6109                         dev_err(&pdev->dev,
6110                                 "Failed to optimize SR-IOV resources\n");
6111         }
6112
6113         status = be_get_resources(adapter);
6114         if (status)
6115                 return be_cmd_status(status);
6116
6117         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6118         rtnl_lock();
6119         status = be_update_queues(adapter);
6120         rtnl_unlock();
6121         if (status)
6122                 return be_cmd_status(status);
6123
6124         if (adapter->num_vfs)
6125                 status = be_vf_setup(adapter);
6126
6127         if (!status)
6128                 return adapter->num_vfs;
6129
6130         return 0;
6131 }
6132
6133 static const struct pci_error_handlers be_eeh_handlers = {
6134         .error_detected = be_eeh_err_detected,
6135         .slot_reset = be_eeh_reset,
6136         .resume = be_eeh_resume,
6137 };
6138
6139 static struct pci_driver be_driver = {
6140         .name = DRV_NAME,
6141         .id_table = be_dev_ids,
6142         .probe = be_probe,
6143         .remove = be_remove,
6144         .suspend = be_suspend,
6145         .resume = be_pci_resume,
6146         .shutdown = be_shutdown,
6147         .sriov_configure = be_pci_sriov_configure,
6148         .err_handler = &be_eeh_handlers
6149 };
6150
6151 static int __init be_init_module(void)
6152 {
6153         int status;
6154
6155         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6156             rx_frag_size != 2048) {
6157                 printk(KERN_WARNING DRV_NAME
6158                         " : Module param rx_frag_size must be 2048/4096/8192."
6159                         " Using 2048\n");
6160                 rx_frag_size = 2048;
6161         }
6162
6163         if (num_vfs > 0) {
6164                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6165                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6166         }
6167
6168         be_wq = create_singlethread_workqueue("be_wq");
6169         if (!be_wq) {
6170                 pr_warn(DRV_NAME "workqueue creation failed\n");
6171                 return -1;
6172         }
6173
6174         be_err_recovery_workq =
6175                 create_singlethread_workqueue("be_err_recover");
6176         if (!be_err_recovery_workq)
6177                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6178
6179         status = pci_register_driver(&be_driver);
6180         if (status) {
6181                 destroy_workqueue(be_wq);
6182                 be_destroy_err_recovery_workq();
6183         }
6184         return status;
6185 }
6186 module_init(be_init_module);
6187
6188 static void __exit be_exit_module(void)
6189 {
6190         pci_unregister_driver(&be_driver);
6191
6192         be_destroy_err_recovery_workq();
6193
6194         if (be_wq)
6195                 destroy_workqueue(be_wq);
6196 }
6197 module_exit(be_exit_module);