2 * Copyright (C) 2015 Cavium, Inc.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
10 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/iommu.h>
20 #include "nicvf_queues.h"
22 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
24 static void nicvf_get_page(struct nicvf *nic)
26 if (!nic->rb_pageref || !nic->rb_page)
29 page_ref_add(nic->rb_page, nic->rb_pageref);
33 /* Poll a register for a specific value */
34 static int nicvf_poll_reg(struct nicvf *nic, int qidx,
35 u64 reg, int bit_pos, int bits, int val)
41 bit_mask = (1ULL << bits) - 1;
42 bit_mask = (bit_mask << bit_pos);
45 reg_val = nicvf_queue_reg_read(nic, reg, qidx);
46 if (((reg_val & bit_mask) >> bit_pos) == val)
48 usleep_range(1000, 2000);
51 netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg);
55 /* Allocate memory for a queue's descriptors */
56 static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem,
57 int q_len, int desc_size, int align_bytes)
60 dmem->size = (desc_size * q_len) + align_bytes;
61 /* Save address, need it while freeing */
62 dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size,
63 &dmem->dma, GFP_KERNEL);
64 if (!dmem->unalign_base)
67 /* Align memory address for 'align_bytes' */
68 dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes);
69 dmem->base = dmem->unalign_base + (dmem->phys_base - dmem->dma);
73 /* Free queue's descriptor memory */
74 static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem)
79 dma_free_coherent(&nic->pdev->dev, dmem->size,
80 dmem->unalign_base, dmem->dma);
81 dmem->unalign_base = NULL;
85 #define XDP_PAGE_REFCNT_REFILL 256
87 /* Allocate a new page or recycle one if possible
89 * We cannot optimize dma mapping here, since
90 * 1. It's only one RBDR ring for 8 Rx queues.
91 * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
92 * and not idx into RBDR ring, so can't refer to saved info.
93 * 3. There are multiple receive buffers per page
95 static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic,
96 struct rbdr *rbdr, gfp_t gfp)
99 struct page *page = NULL;
100 struct pgcache *pgcache, *next;
102 /* Check if page is already allocated */
103 pgcache = &rbdr->pgcache[rbdr->pgidx];
104 page = pgcache->page;
105 /* Check if page can be recycled */
107 ref_count = page_ref_count(page);
108 /* This page can be recycled if internal ref_count and page's
109 * ref_count are equal, indicating that the page has been used
110 * once for packet transmission. For non-XDP mode, internal
111 * ref_count is always '1'.
114 if (ref_count == pgcache->ref_count)
115 pgcache->ref_count--;
118 } else if (ref_count != 1) {
124 page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0);
128 this_cpu_inc(nic->pnicvf->drv_stats->page_alloc);
130 /* Check for space */
131 if (rbdr->pgalloc >= rbdr->pgcnt) {
132 /* Page can still be used */
137 /* Save the page in page cache */
138 pgcache->page = page;
139 pgcache->dma_addr = 0;
140 pgcache->ref_count = 0;
144 /* Take additional page references for recycling */
146 /* Since there is single RBDR (i.e single core doing
147 * page recycling) per 8 Rx queues, in XDP mode adjusting
148 * page references atomically is the biggest bottleneck, so
149 * take bunch of references at a time.
151 * So here, below reference counts defer by '1'.
153 if (!pgcache->ref_count) {
154 pgcache->ref_count = XDP_PAGE_REFCNT_REFILL;
155 page_ref_add(page, XDP_PAGE_REFCNT_REFILL);
158 /* In non-XDP case, single 64K page is divided across multiple
159 * receive buffers, so cost of recycling is less anyway.
160 * So we can do with just one extra reference.
162 page_ref_add(page, 1);
166 rbdr->pgidx &= (rbdr->pgcnt - 1);
168 /* Prefetch refcount of next page in page cache */
169 next = &rbdr->pgcache[rbdr->pgidx];
172 prefetch(&page->_refcount);
177 /* Allocate buffer for packet reception */
178 static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr,
179 gfp_t gfp, u32 buf_len, u64 *rbuf)
181 struct pgcache *pgcache = NULL;
183 /* Check if request can be accomodated in previous allocated page.
184 * But in XDP mode only one buffer per page is permitted.
186 if (!rbdr->is_xdp && nic->rb_page &&
187 ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) {
195 /* Get new page, either recycled or new one */
196 pgcache = nicvf_alloc_page(nic, rbdr, gfp);
197 if (!pgcache && !nic->rb_page) {
198 this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures);
202 nic->rb_page_offset = 0;
204 /* Reserve space for header modifications by BPF program */
206 buf_len += XDP_PACKET_HEADROOM;
208 /* Check if it's recycled */
210 nic->rb_page = pgcache->page;
212 if (rbdr->is_xdp && pgcache && pgcache->dma_addr) {
213 *rbuf = pgcache->dma_addr;
215 /* HW will ensure data coherency, CPU sync not required */
216 *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page,
217 nic->rb_page_offset, buf_len,
219 DMA_ATTR_SKIP_CPU_SYNC);
220 if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) {
221 if (!nic->rb_page_offset)
222 __free_pages(nic->rb_page, 0);
227 pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM;
228 nic->rb_page_offset += buf_len;
234 /* Build skb around receive buffer */
235 static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic,
241 data = phys_to_virt(rb_ptr);
243 /* Now build an skb to give to stack */
244 skb = build_skb(data, RCV_FRAG_LEN);
246 put_page(virt_to_page(data));
254 /* Allocate RBDR ring and populate receive buffers */
255 static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr,
256 int ring_len, int buf_size)
260 struct rbdr_entry_t *desc;
263 err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len,
264 sizeof(struct rbdr_entry_t),
265 NICVF_RCV_BUF_ALIGN_BYTES);
269 rbdr->desc = rbdr->dmem.base;
270 /* Buffer size has to be in multiples of 128 bytes */
271 rbdr->dma_size = buf_size;
273 rbdr->thresh = RBDR_THRESH;
277 /* Initialize page recycling stuff.
279 * Can't use single buffer per page especially with 64K pages.
280 * On embedded platforms i.e 81xx/83xx available memory itself
281 * is low and minimum ring size of RBDR is 8K, that takes away
284 * But for XDP it has to be a single buffer per page.
286 if (!nic->pnicvf->xdp_prog) {
287 rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size);
288 rbdr->is_xdp = false;
290 rbdr->pgcnt = ring_len;
293 rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt);
294 rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) *
295 rbdr->pgcnt, GFP_KERNEL);
302 for (idx = 0; idx < ring_len; idx++) {
303 err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL,
304 RCV_FRAG_LEN, &rbuf);
306 /* To free already allocated and mapped ones */
307 rbdr->tail = idx - 1;
311 desc = GET_RBDR_DESC(rbdr, idx);
312 desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
320 /* Free RBDR ring and its receive buffers */
321 static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr)
324 u64 buf_addr, phys_addr;
325 struct pgcache *pgcache;
326 struct rbdr_entry_t *desc;
331 rbdr->enable = false;
332 if (!rbdr->dmem.base)
338 /* Release page references */
339 while (head != tail) {
340 desc = GET_RBDR_DESC(rbdr, head);
341 buf_addr = desc->buf_addr;
342 phys_addr = nicvf_iova_to_phys(nic, buf_addr);
343 dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
344 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
346 put_page(virt_to_page(phys_to_virt(phys_addr)));
348 head &= (rbdr->dmem.q_len - 1);
350 /* Release buffer of tail desc */
351 desc = GET_RBDR_DESC(rbdr, tail);
352 buf_addr = desc->buf_addr;
353 phys_addr = nicvf_iova_to_phys(nic, buf_addr);
354 dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN,
355 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
357 put_page(virt_to_page(phys_to_virt(phys_addr)));
359 /* Sync page cache info */
362 /* Release additional page references held for recycling */
364 while (head < rbdr->pgcnt) {
365 pgcache = &rbdr->pgcache[head];
366 if (pgcache->page && page_ref_count(pgcache->page) != 0) {
368 page_ref_sub(pgcache->page,
369 pgcache->ref_count - 1);
371 put_page(pgcache->page);
377 nicvf_free_q_desc_mem(nic, &rbdr->dmem);
380 /* Refill receive buffer descriptors with new buffers.
382 static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp)
384 struct queue_set *qs = nic->qs;
385 int rbdr_idx = qs->rbdr_cnt;
389 struct rbdr_entry_t *desc;
397 rbdr = &qs->rbdr[rbdr_idx];
398 /* Check if it's enabled */
402 /* Get no of desc's to be refilled */
403 qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx);
405 /* Doorbell can be ringed with a max of ring size minus 1 */
406 if (qcount >= (qs->rbdr_len - 1))
409 refill_rb_cnt = qs->rbdr_len - qcount - 1;
411 /* Sync page cache info */
414 /* Start filling descs from tail */
415 tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3;
416 while (refill_rb_cnt) {
418 tail &= (rbdr->dmem.q_len - 1);
420 if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf))
423 desc = GET_RBDR_DESC(rbdr, tail);
424 desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1);
431 /* make sure all memory stores are done before ringing doorbell */
434 /* Check if buffer allocation failed */
436 nic->rb_alloc_fail = true;
438 nic->rb_alloc_fail = false;
441 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
444 /* Re-enable RBDR interrupts only if buffer allocation is success */
445 if (!nic->rb_alloc_fail && rbdr->enable &&
446 netif_running(nic->pnicvf->netdev))
447 nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
453 /* Alloc rcv buffers in non-atomic mode for better success */
454 void nicvf_rbdr_work(struct work_struct *work)
456 struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work);
458 nicvf_refill_rbdr(nic, GFP_KERNEL);
459 if (nic->rb_alloc_fail)
460 schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
462 nic->rb_work_scheduled = false;
465 /* In Softirq context, alloc rcv buffers in atomic mode */
466 void nicvf_rbdr_task(unsigned long data)
468 struct nicvf *nic = (struct nicvf *)data;
470 nicvf_refill_rbdr(nic, GFP_ATOMIC);
471 if (nic->rb_alloc_fail) {
472 nic->rb_work_scheduled = true;
473 schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10));
477 /* Initialize completion queue */
478 static int nicvf_init_cmp_queue(struct nicvf *nic,
479 struct cmp_queue *cq, int q_len)
483 err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE,
484 NICVF_CQ_BASE_ALIGN_BYTES);
488 cq->desc = cq->dmem.base;
489 cq->thresh = pass1_silicon(nic->pdev) ? 0 : CMP_QUEUE_CQE_THRESH;
490 nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1;
495 static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq)
502 nicvf_free_q_desc_mem(nic, &cq->dmem);
505 /* Initialize transmit queue */
506 static int nicvf_init_snd_queue(struct nicvf *nic,
507 struct snd_queue *sq, int q_len, int qidx)
511 err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE,
512 NICVF_SQ_BASE_ALIGN_BYTES);
516 sq->desc = sq->dmem.base;
517 sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
523 sq->thresh = SND_QUEUE_THRESH;
525 /* Check if this SQ is a XDP TX queue */
527 qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS);
528 if (qidx < nic->pnicvf->xdp_tx_queues) {
529 /* Alloc memory to save page pointers for XDP_TX */
530 sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL);
533 sq->xdp_desc_cnt = 0;
534 sq->xdp_free_cnt = q_len - 1;
538 sq->xdp_desc_cnt = 0;
539 sq->xdp_free_cnt = 0;
542 atomic_set(&sq->free_cnt, q_len - 1);
544 /* Preallocate memory for TSO segment's header */
545 sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev,
546 q_len * TSO_HEADER_SIZE,
556 void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq,
557 int hdr_sqe, u8 subdesc_cnt)
560 struct sq_gather_subdesc *gather;
562 /* Unmap DMA mapped skb data buffers */
563 for (idx = 0; idx < subdesc_cnt; idx++) {
565 hdr_sqe &= (sq->dmem.q_len - 1);
566 gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe);
567 /* HW will ensure data coherency, CPU sync not required */
568 dma_unmap_page_attrs(&nic->pdev->dev, gather->addr,
569 gather->size, DMA_TO_DEVICE,
570 DMA_ATTR_SKIP_CPU_SYNC);
574 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
578 struct sq_hdr_subdesc *hdr;
579 struct sq_hdr_subdesc *tso_sqe;
587 dma_free_coherent(&nic->pdev->dev,
588 sq->dmem.q_len * TSO_HEADER_SIZE,
589 sq->tso_hdrs, sq->tso_hdrs_phys);
593 /* Free pending skbs in the queue */
595 while (sq->head != sq->tail) {
596 skb = (struct sk_buff *)sq->skbuff[sq->head];
597 if (!skb || !sq->xdp_page)
600 page = (struct page *)sq->xdp_page[sq->head];
606 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
607 /* Check for dummy descriptor used for HW TSO offload on 88xx */
608 if (hdr->dont_send) {
609 /* Get actual TSO descriptors and unmap them */
611 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
612 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2,
613 tso_sqe->subdesc_cnt);
615 nicvf_unmap_sndq_buffers(nic, sq, sq->head,
619 dev_kfree_skb_any(skb);
622 sq->head &= (sq->dmem.q_len - 1);
626 nicvf_free_q_desc_mem(nic, &sq->dmem);
629 static void nicvf_reclaim_snd_queue(struct nicvf *nic,
630 struct queue_set *qs, int qidx)
632 /* Disable send queue */
633 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0);
634 /* Check if SQ is stopped */
635 if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01))
637 /* Reset send queue */
638 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
641 static void nicvf_reclaim_rcv_queue(struct nicvf *nic,
642 struct queue_set *qs, int qidx)
644 union nic_mbx mbx = {};
646 /* Make sure all packets in the pipeline are written back into mem */
647 mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC;
648 nicvf_send_msg_to_pf(nic, &mbx);
651 static void nicvf_reclaim_cmp_queue(struct nicvf *nic,
652 struct queue_set *qs, int qidx)
654 /* Disable timer threshold (doesn't get reset upon CQ reset */
655 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0);
656 /* Disable completion queue */
657 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0);
658 /* Reset completion queue */
659 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
662 static void nicvf_reclaim_rbdr(struct nicvf *nic,
663 struct rbdr *rbdr, int qidx)
668 /* Save head and tail pointers for feeing up buffers */
669 rbdr->head = nicvf_queue_reg_read(nic,
670 NIC_QSET_RBDR_0_1_HEAD,
672 rbdr->tail = nicvf_queue_reg_read(nic,
673 NIC_QSET_RBDR_0_1_TAIL,
676 /* If RBDR FIFO is in 'FAIL' state then do a reset first
679 fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx);
680 if (((fifo_state >> 62) & 0x03) == 0x3)
681 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
682 qidx, NICVF_RBDR_RESET);
685 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0);
686 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
689 tmp = nicvf_queue_reg_read(nic,
690 NIC_QSET_RBDR_0_1_PREFETCH_STATUS,
692 if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF))
694 usleep_range(1000, 2000);
697 netdev_err(nic->netdev,
698 "Failed polling on prefetch status\n");
702 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
703 qidx, NICVF_RBDR_RESET);
705 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02))
707 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00);
708 if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00))
712 void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features)
717 rq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_RQ_GEN_CFG, 0);
719 /* Enable first VLAN stripping */
720 if (features & NETIF_F_HW_VLAN_CTAG_RX)
721 rq_cfg |= (1ULL << 25);
723 rq_cfg &= ~(1ULL << 25);
724 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
726 /* Configure Secondary Qsets, if any */
727 for (sqs = 0; sqs < nic->sqs_count; sqs++)
728 if (nic->snicvf[sqs])
729 nicvf_queue_reg_write(nic->snicvf[sqs],
730 NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
733 static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
735 union nic_mbx mbx = {};
737 /* Reset all RQ/SQ and VF stats */
738 mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
739 mbx.reset_stat.rx_stat_mask = 0x3FFF;
740 mbx.reset_stat.tx_stat_mask = 0x1F;
741 mbx.reset_stat.rq_stat_mask = 0xFFFF;
742 mbx.reset_stat.sq_stat_mask = 0xFFFF;
743 nicvf_send_msg_to_pf(nic, &mbx);
746 /* Configures receive queue */
747 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
748 int qidx, bool enable)
750 union nic_mbx mbx = {};
751 struct rcv_queue *rq;
752 struct rq_cfg rq_cfg;
757 /* Disable receive queue */
758 nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0);
761 nicvf_reclaim_rcv_queue(nic, qs, qidx);
765 rq->cq_qs = qs->vnic_id;
767 rq->start_rbdr_qs = qs->vnic_id;
768 rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1;
769 rq->cont_rbdr_qs = qs->vnic_id;
770 rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1;
771 /* all writes of RBDR data to be loaded into L2 Cache as well*/
774 /* Send a mailbox msg to PF to config RQ */
775 mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
776 mbx.rq.qs_num = qs->vnic_id;
777 mbx.rq.rq_num = qidx;
778 mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) |
779 (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) |
780 (rq->cont_qs_rbdr_idx << 8) |
781 (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx);
782 nicvf_send_msg_to_pf(nic, &mbx);
784 mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
785 mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
786 (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
788 nicvf_send_msg_to_pf(nic, &mbx);
791 * Enable CQ drop to reserve sufficient CQEs for all tx packets
793 mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
794 mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
795 (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
796 (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
797 nicvf_send_msg_to_pf(nic, &mbx);
799 if (!nic->sqs_mode && (qidx == 0)) {
800 /* Enable checking L3/L4 length and TCP/UDP checksums
801 * Also allow IPv6 pkts with zero UDP checksum.
803 nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
804 (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
805 nicvf_config_vlan_stripping(nic, nic->netdev->features);
808 /* Enable Receive queue */
809 memset(&rq_cfg, 0, sizeof(struct rq_cfg));
812 nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
815 /* Configures completion queue */
816 void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
817 int qidx, bool enable)
819 struct cmp_queue *cq;
820 struct cq_cfg cq_cfg;
826 nicvf_reclaim_cmp_queue(nic, qs, qidx);
830 /* Reset completion queue */
831 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET);
836 spin_lock_init(&cq->lock);
837 /* Set completion queue base address */
838 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE,
839 qidx, (u64)(cq->dmem.phys_base));
841 /* Enable Completion queue */
842 memset(&cq_cfg, 0, sizeof(struct cq_cfg));
846 cq_cfg.qsize = ilog2(qs->cq_len >> 10);
848 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg);
850 /* Set threshold value for interrupt generation */
851 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
852 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
853 qidx, CMP_QUEUE_TIMER_THRESH);
856 /* Configures transmit queue */
857 static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
858 int qidx, bool enable)
860 union nic_mbx mbx = {};
861 struct snd_queue *sq;
862 struct sq_cfg sq_cfg;
868 nicvf_reclaim_snd_queue(nic, qs, qidx);
872 /* Reset send queue */
873 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET);
875 sq->cq_qs = qs->vnic_id;
878 /* Send a mailbox msg to PF to config SQ */
879 mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG;
880 mbx.sq.qs_num = qs->vnic_id;
881 mbx.sq.sq_num = qidx;
882 mbx.sq.sqs_mode = nic->sqs_mode;
883 mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx;
884 nicvf_send_msg_to_pf(nic, &mbx);
886 /* Set queue base address */
887 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE,
888 qidx, (u64)(sq->dmem.phys_base));
890 /* Enable send queue & set queue size */
891 memset(&sq_cfg, 0, sizeof(struct sq_cfg));
895 sq_cfg.qsize = ilog2(qs->sq_len >> 10);
896 sq_cfg.tstmp_bgx_intf = 0;
897 /* CQ's level at which HW will stop processing SQEs to avoid
898 * transmitting a pkt with no space in CQ to post CQE_TX.
900 sq_cfg.cq_limit = (CMP_QUEUE_PIPELINE_RSVD * 256) / qs->cq_len;
901 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
903 /* Set threshold value for interrupt generation */
904 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh);
906 /* Set queue:cpu affinity for better load distribution */
907 if (cpu_online(qidx)) {
908 cpumask_set_cpu(qidx, &sq->affinity_mask);
909 netif_set_xps_queue(nic->netdev,
910 &sq->affinity_mask, qidx);
914 /* Configures receive buffer descriptor ring */
915 static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
916 int qidx, bool enable)
919 struct rbdr_cfg rbdr_cfg;
921 rbdr = &qs->rbdr[qidx];
922 nicvf_reclaim_rbdr(nic, rbdr, qidx);
926 /* Set descriptor base address */
927 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE,
928 qidx, (u64)(rbdr->dmem.phys_base));
930 /* Enable RBDR & set queue size */
931 /* Buffer size should be in multiples of 128 bytes */
932 memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
936 rbdr_cfg.qsize = RBDR_SIZE;
937 rbdr_cfg.avg_con = 0;
938 rbdr_cfg.lines = rbdr->dma_size / 128;
939 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG,
940 qidx, *(u64 *)&rbdr_cfg);
943 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR,
944 qidx, qs->rbdr_len - 1);
946 /* Set threshold value for interrupt generation */
947 nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH,
948 qidx, rbdr->thresh - 1);
951 /* Requests PF to assign and enable Qset */
952 void nicvf_qset_config(struct nicvf *nic, bool enable)
954 union nic_mbx mbx = {};
955 struct queue_set *qs = nic->qs;
956 struct qs_cfg *qs_cfg;
959 netdev_warn(nic->netdev,
960 "Qset is still not allocated, don't init queues\n");
965 qs->vnic_id = nic->vf_id;
967 /* Send a mailbox msg to PF to config Qset */
968 mbx.qs.msg = NIC_MBOX_MSG_QS_CFG;
969 mbx.qs.num = qs->vnic_id;
970 mbx.qs.sqs_count = nic->sqs_count;
973 qs_cfg = (struct qs_cfg *)&mbx.qs.cfg;
979 qs_cfg->vnic = qs->vnic_id;
981 nicvf_send_msg_to_pf(nic, &mbx);
984 static void nicvf_free_resources(struct nicvf *nic)
987 struct queue_set *qs = nic->qs;
989 /* Free receive buffer descriptor ring */
990 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
991 nicvf_free_rbdr(nic, &qs->rbdr[qidx]);
993 /* Free completion queue */
994 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
995 nicvf_free_cmp_queue(nic, &qs->cq[qidx]);
997 /* Free send queue */
998 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
999 nicvf_free_snd_queue(nic, &qs->sq[qidx]);
1002 static int nicvf_alloc_resources(struct nicvf *nic)
1005 struct queue_set *qs = nic->qs;
1007 /* Alloc receive buffer descriptor ring */
1008 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1009 if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len,
1014 /* Alloc send queue */
1015 for (qidx = 0; qidx < qs->sq_cnt; qidx++) {
1016 if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx))
1020 /* Alloc completion queue */
1021 for (qidx = 0; qidx < qs->cq_cnt; qidx++) {
1022 if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len))
1028 nicvf_free_resources(nic);
1032 int nicvf_set_qset_resources(struct nicvf *nic)
1034 struct queue_set *qs;
1036 qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL);
1041 /* Set count of each queue */
1042 qs->rbdr_cnt = DEFAULT_RBDR_CNT;
1043 qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
1044 qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
1045 qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
1047 /* Set queue lengths */
1048 qs->rbdr_len = RCV_BUF_COUNT;
1049 qs->sq_len = SND_QUEUE_LEN;
1050 qs->cq_len = CMP_QUEUE_LEN;
1052 nic->rx_queues = qs->rq_cnt;
1053 nic->tx_queues = qs->sq_cnt;
1054 nic->xdp_tx_queues = 0;
1059 int nicvf_config_data_transfer(struct nicvf *nic, bool enable)
1061 bool disable = false;
1062 struct queue_set *qs = nic->qs;
1063 struct queue_set *pqs = nic->pnicvf->qs;
1069 /* Take primary VF's queue lengths.
1070 * This is needed to take queue lengths set from ethtool
1071 * into consideration.
1073 if (nic->sqs_mode && pqs) {
1074 qs->cq_len = pqs->cq_len;
1075 qs->sq_len = pqs->sq_len;
1079 if (nicvf_alloc_resources(nic))
1082 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1083 nicvf_snd_queue_config(nic, qs, qidx, enable);
1084 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1085 nicvf_cmp_queue_config(nic, qs, qidx, enable);
1086 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1087 nicvf_rbdr_config(nic, qs, qidx, enable);
1088 for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1089 nicvf_rcv_queue_config(nic, qs, qidx, enable);
1091 for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1092 nicvf_rcv_queue_config(nic, qs, qidx, disable);
1093 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
1094 nicvf_rbdr_config(nic, qs, qidx, disable);
1095 for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1096 nicvf_snd_queue_config(nic, qs, qidx, disable);
1097 for (qidx = 0; qidx < qs->cq_cnt; qidx++)
1098 nicvf_cmp_queue_config(nic, qs, qidx, disable);
1100 nicvf_free_resources(nic);
1103 /* Reset RXQ's stats.
1104 * SQ's stats will get reset automatically once SQ is reset.
1106 nicvf_reset_rcv_queue_stats(nic);
1111 /* Get a free desc from SQ
1112 * returns descriptor ponter & descriptor number
1114 static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt)
1120 atomic_sub(desc_cnt, &sq->free_cnt);
1122 sq->xdp_free_cnt -= desc_cnt;
1123 sq->tail += desc_cnt;
1124 sq->tail &= (sq->dmem.q_len - 1);
1129 /* Rollback to previous tail pointer when descriptors not used */
1130 static inline void nicvf_rollback_sq_desc(struct snd_queue *sq,
1131 int qentry, int desc_cnt)
1134 atomic_add(desc_cnt, &sq->free_cnt);
1137 /* Free descriptor back to SQ for future use */
1138 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt)
1141 atomic_add(desc_cnt, &sq->free_cnt);
1143 sq->xdp_free_cnt += desc_cnt;
1144 sq->head += desc_cnt;
1145 sq->head &= (sq->dmem.q_len - 1);
1148 static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry)
1151 qentry &= (sq->dmem.q_len - 1);
1155 void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx)
1159 sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1160 sq_cfg |= NICVF_SQ_EN;
1161 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1162 /* Ring doorbell so that H/W restarts processing SQEs */
1163 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0);
1166 void nicvf_sq_disable(struct nicvf *nic, int qidx)
1170 sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx);
1171 sq_cfg &= ~NICVF_SQ_EN;
1172 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg);
1175 void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq,
1179 struct sk_buff *skb;
1180 struct nicvf *nic = netdev_priv(netdev);
1181 struct sq_hdr_subdesc *hdr;
1183 head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4;
1184 tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4;
1185 while (sq->head != head) {
1186 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head);
1187 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) {
1188 nicvf_put_sq_desc(sq, 1);
1191 skb = (struct sk_buff *)sq->skbuff[sq->head];
1193 dev_kfree_skb_any(skb);
1194 atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets);
1195 atomic64_add(hdr->tot_len,
1196 (atomic64_t *)&netdev->stats.tx_bytes);
1197 nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
1201 /* XDP Transmit APIs */
1202 void nicvf_xdp_sq_doorbell(struct nicvf *nic,
1203 struct snd_queue *sq, int sq_num)
1205 if (!sq->xdp_desc_cnt)
1208 /* make sure all memory stores are done before ringing doorbell */
1211 /* Inform HW to xmit all TSO segments */
1212 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1213 sq_num, sq->xdp_desc_cnt);
1214 sq->xdp_desc_cnt = 0;
1218 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry,
1219 int subdesc_cnt, u64 data, int len)
1221 struct sq_hdr_subdesc *hdr;
1223 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1224 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1225 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1226 hdr->subdesc_cnt = subdesc_cnt;
1229 sq->xdp_page[qentry] = (u64)virt_to_page((void *)data);
1232 int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq,
1233 u64 bufaddr, u64 dma_addr, u16 len)
1235 int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1238 if (subdesc_cnt > sq->xdp_free_cnt)
1241 qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1243 nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len);
1245 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1246 nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr);
1248 sq->xdp_desc_cnt += subdesc_cnt;
1253 /* Calculate no of SQ subdescriptors needed to transmit all
1254 * segments of this TSO packet.
1255 * Taken from 'Tilera network driver' with a minor modification.
1257 static int nicvf_tso_count_subdescs(struct sk_buff *skb)
1259 struct skb_shared_info *sh = skb_shinfo(skb);
1260 unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1261 unsigned int data_len = skb->len - sh_len;
1262 unsigned int p_len = sh->gso_size;
1263 long f_id = -1; /* id of the current fragment */
1264 long f_size = skb_headlen(skb) - sh_len; /* current fragment size */
1265 long f_used = 0; /* bytes used from the current fragment */
1266 long n; /* size of the current piece of payload */
1270 for (segment = 0; segment < sh->gso_segs; segment++) {
1271 unsigned int p_used = 0;
1273 /* One edesc for header and for each piece of the payload. */
1274 for (num_edescs++; p_used < p_len; num_edescs++) {
1275 /* Advance as needed. */
1276 while (f_used >= f_size) {
1278 f_size = skb_frag_size(&sh->frags[f_id]);
1282 /* Use bytes from the current fragment. */
1284 if (n > f_size - f_used)
1285 n = f_size - f_used;
1290 /* The last segment may be less than gso_size. */
1292 if (data_len < p_len)
1296 /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1297 return num_edescs + sh->gso_segs;
1300 #define POST_CQE_DESC_COUNT 2
1302 /* Get the number of SQ descriptors needed to xmit this skb */
1303 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
1305 int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT;
1307 if (skb_shinfo(skb)->gso_size && !nic->hw_tso) {
1308 subdesc_cnt = nicvf_tso_count_subdescs(skb);
1312 /* Dummy descriptors to get TSO pkt completion notification */
1313 if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
1314 subdesc_cnt += POST_CQE_DESC_COUNT;
1316 if (skb_shinfo(skb)->nr_frags)
1317 subdesc_cnt += skb_shinfo(skb)->nr_frags;
1322 /* Add SQ HEADER subdescriptor.
1323 * First subdescriptor for every send descriptor.
1326 nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
1327 int subdesc_cnt, struct sk_buff *skb, int len)
1330 struct sq_hdr_subdesc *hdr;
1337 ip.hdr = skb_network_header(skb);
1338 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1339 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1340 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1342 if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
1343 /* post_cqe = 0, to avoid HW posting a CQE for every TSO
1344 * segment transmitted on 88xx.
1346 hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
1348 sq->skbuff[qentry] = (u64)skb;
1349 /* Enable notification via CQE after processing SQE */
1351 /* No of subdescriptors following this */
1352 hdr->subdesc_cnt = subdesc_cnt;
1356 /* Offload checksum calculation to HW */
1357 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1358 if (ip.v4->version == 4)
1359 hdr->csum_l3 = 1; /* Enable IP csum calculation */
1360 hdr->l3_offset = skb_network_offset(skb);
1361 hdr->l4_offset = skb_transport_offset(skb);
1363 proto = (ip.v4->version == 4) ? ip.v4->protocol :
1368 hdr->csum_l4 = SEND_L4_CSUM_TCP;
1371 hdr->csum_l4 = SEND_L4_CSUM_UDP;
1374 hdr->csum_l4 = SEND_L4_CSUM_SCTP;
1379 if (nic->hw_tso && skb_shinfo(skb)->gso_size) {
1381 hdr->tso_start = skb_transport_offset(skb) + tcp_hdrlen(skb);
1382 hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
1383 /* For non-tunneled pkts, point this to L2 ethertype */
1384 hdr->inner_l3_offset = skb_network_offset(skb) - 2;
1385 this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1389 /* SQ GATHER subdescriptor
1390 * Must follow HDR descriptor
1392 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry,
1395 struct sq_gather_subdesc *gather;
1397 qentry &= (sq->dmem.q_len - 1);
1398 gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry);
1400 memset(gather, 0, SND_QUEUE_DESC_SIZE);
1401 gather->subdesc_type = SQ_DESC_TYPE_GATHER;
1402 gather->ld_type = NIC_SEND_LD_TYPE_E_LDD;
1403 gather->size = size;
1404 gather->addr = data;
1407 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1408 * packet so that a CQE is posted as a notifation for transmission of
1411 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
1412 int tso_sqe, struct sk_buff *skb)
1414 struct sq_imm_subdesc *imm;
1415 struct sq_hdr_subdesc *hdr;
1417 sq->skbuff[qentry] = (u64)skb;
1419 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
1420 memset(hdr, 0, SND_QUEUE_DESC_SIZE);
1421 hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
1422 /* Enable notification via CQE after processing SQE */
1424 /* There is no packet to transmit here */
1426 hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
1428 /* Actual TSO header SQE index, needed for cleanup */
1429 hdr->rsvd2 = tso_sqe;
1431 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1432 imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
1433 memset(imm, 0, SND_QUEUE_DESC_SIZE);
1434 imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
1438 static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
1439 int sq_num, int desc_cnt)
1441 struct netdev_queue *txq;
1443 txq = netdev_get_tx_queue(nic->pnicvf->netdev,
1444 skb_get_queue_mapping(skb));
1446 netdev_tx_sent_queue(txq, skb->len);
1448 /* make sure all memory stores are done before ringing doorbell */
1451 /* Inform HW to xmit all TSO segments */
1452 nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
1456 /* Segment a TSO packet into 'gso_size' segments and append
1457 * them to SQ for transfer
1459 static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
1460 int sq_num, int qentry, struct sk_buff *skb)
1463 int seg_subdescs = 0, desc_cnt = 0;
1464 int seg_len, total_len, data_left;
1465 int hdr_qentry = qentry;
1466 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
1468 tso_start(skb, &tso);
1469 total_len = skb->len - hdr_len;
1470 while (total_len > 0) {
1473 /* Save Qentry for adding HDR_SUBDESC at the end */
1474 hdr_qentry = qentry;
1476 data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
1477 total_len -= data_left;
1479 /* Add segment's header */
1480 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1481 hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE;
1482 tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
1483 nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len,
1485 qentry * TSO_HEADER_SIZE);
1486 /* HDR_SUDESC + GATHER */
1490 /* Add segment's payload fragments */
1491 while (data_left > 0) {
1494 size = min_t(int, tso.size, data_left);
1496 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1497 nicvf_sq_add_gather_subdesc(sq, qentry, size,
1498 virt_to_phys(tso.data));
1503 tso_build_data(skb, &tso, size);
1505 nicvf_sq_add_hdr_subdesc(nic, sq, hdr_qentry,
1506 seg_subdescs - 1, skb, seg_len);
1507 sq->skbuff[hdr_qentry] = (u64)NULL;
1508 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1510 desc_cnt += seg_subdescs;
1512 /* Save SKB in the last segment for freeing */
1513 sq->skbuff[hdr_qentry] = (u64)skb;
1515 nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
1517 this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
1521 /* Append an skb to a SQ for packet transfer. */
1522 int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
1523 struct sk_buff *skb, u8 sq_num)
1526 int subdesc_cnt, hdr_sqe = 0;
1530 subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
1531 if (subdesc_cnt > atomic_read(&sq->free_cnt))
1534 qentry = nicvf_get_sq_desc(sq, subdesc_cnt);
1536 /* Check if its a TSO packet */
1537 if (skb_shinfo(skb)->gso_size && !nic->hw_tso)
1538 return nicvf_sq_append_tso(nic, sq, sq_num, qentry, skb);
1540 /* Add SQ header subdesc */
1541 nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
1545 /* Add SQ gather subdescs */
1546 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1547 size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len;
1548 /* HW will ensure data coherency, CPU sync not required */
1549 dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data),
1550 offset_in_page(skb->data), size,
1551 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1552 if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1553 nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1557 nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1559 /* Check for scattered buffer */
1560 if (!skb_is_nonlinear(skb))
1563 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1564 const struct skb_frag_struct *frag;
1566 frag = &skb_shinfo(skb)->frags[i];
1568 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1569 size = skb_frag_size(frag);
1570 dma_addr = dma_map_page_attrs(&nic->pdev->dev,
1571 skb_frag_page(frag),
1572 frag->page_offset, size,
1574 DMA_ATTR_SKIP_CPU_SYNC);
1575 if (dma_mapping_error(&nic->pdev->dev, dma_addr)) {
1576 /* Free entire chain of mapped buffers
1577 * here 'i' = frags mapped + above mapped skb->data
1579 nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i);
1580 nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt);
1583 nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr);
1587 if (nic->t88 && skb_shinfo(skb)->gso_size) {
1588 qentry = nicvf_get_nxt_sqentry(sq, qentry);
1589 nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb);
1592 nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
1597 /* Use original PCI dev for debug log */
1599 netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n");
1603 static inline unsigned frag_num(unsigned i)
1606 return (i & ~3) + 3 - (i & 3);
1612 static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr,
1613 u64 buf_addr, bool xdp)
1615 struct page *page = NULL;
1616 int len = RCV_FRAG_LEN;
1619 page = virt_to_page(phys_to_virt(buf_addr));
1620 /* Check if it's a recycled page, if not
1621 * unmap the DMA mapping.
1623 * Recycled page holds an extra reference.
1625 if (page_ref_count(page) != 1)
1628 len += XDP_PACKET_HEADROOM;
1629 /* Receive buffers in XDP mode are mapped from page start */
1630 dma_addr &= PAGE_MASK;
1632 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len,
1633 DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1636 /* Returns SKB for a received packet */
1637 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic,
1638 struct cqe_rx_t *cqe_rx, bool xdp)
1641 int payload_len = 0;
1642 struct sk_buff *skb = NULL;
1645 u16 *rb_lens = NULL;
1646 u64 *rb_ptrs = NULL;
1649 rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
1650 /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1651 * CQE_RX at word6, hence buffer pointers move by word
1653 * Use existing 'hw_tso' flag which will be set for all chips
1654 * except 88xx pass1 instead of a additional cache line
1655 * access (or miss) by using pci dev's revision.
1658 rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
1660 rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
1662 for (frag = 0; frag < cqe_rx->rb_cnt; frag++) {
1663 payload_len = rb_lens[frag_num(frag)];
1664 phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs);
1667 dev_kfree_skb_any(skb);
1672 /* First fragment */
1673 nicvf_unmap_rcv_buffer(nic,
1674 *rb_ptrs - cqe_rx->align_pad,
1676 skb = nicvf_rb_ptr_to_skb(nic,
1677 phys_addr - cqe_rx->align_pad,
1681 skb_reserve(skb, cqe_rx->align_pad);
1682 skb_put(skb, payload_len);
1685 nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp);
1686 page = virt_to_page(phys_to_virt(phys_addr));
1687 offset = phys_to_virt(phys_addr) - page_address(page);
1688 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
1689 offset, payload_len, RCV_FRAG_LEN);
1691 /* Next buffer pointer */
1697 static u64 nicvf_int_type_to_mask(int int_type, int q_idx)
1703 reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT);
1706 reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT);
1708 case NICVF_INTR_RBDR:
1709 reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT);
1711 case NICVF_INTR_PKT_DROP:
1712 reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT);
1714 case NICVF_INTR_TCP_TIMER:
1715 reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT);
1717 case NICVF_INTR_MBOX:
1718 reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT);
1720 case NICVF_INTR_QS_ERR:
1721 reg_val = (1ULL << NICVF_INTR_QS_ERR_SHIFT);
1730 /* Enable interrupt */
1731 void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx)
1733 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1736 netdev_dbg(nic->netdev,
1737 "Failed to enable interrupt: unknown type\n");
1740 nicvf_reg_write(nic, NIC_VF_ENA_W1S,
1741 nicvf_reg_read(nic, NIC_VF_ENA_W1S) | mask);
1744 /* Disable interrupt */
1745 void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx)
1747 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1750 netdev_dbg(nic->netdev,
1751 "Failed to disable interrupt: unknown type\n");
1755 nicvf_reg_write(nic, NIC_VF_ENA_W1C, mask);
1758 /* Clear interrupt */
1759 void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx)
1761 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1764 netdev_dbg(nic->netdev,
1765 "Failed to clear interrupt: unknown type\n");
1769 nicvf_reg_write(nic, NIC_VF_INT, mask);
1772 /* Check if interrupt is enabled */
1773 int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx)
1775 u64 mask = nicvf_int_type_to_mask(int_type, q_idx);
1776 /* If interrupt type is unknown, we treat it disabled. */
1778 netdev_dbg(nic->netdev,
1779 "Failed to check interrupt enable: unknown type\n");
1783 return mask & nicvf_reg_read(nic, NIC_VF_ENA_W1S);
1786 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx)
1788 struct rcv_queue *rq;
1790 #define GET_RQ_STATS(reg) \
1791 nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1792 (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1794 rq = &nic->qs->rq[rq_idx];
1795 rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS);
1796 rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS);
1799 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
1801 struct snd_queue *sq;
1803 #define GET_SQ_STATS(reg) \
1804 nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1805 (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1807 sq = &nic->qs->sq[sq_idx];
1808 sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS);
1809 sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS);
1812 /* Check for errors in the receive cmp.queue entry */
1813 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
1815 netif_err(nic, rx_err, nic->netdev,
1816 "RX error CQE err_level 0x%x err_opcode 0x%x\n",
1817 cqe_rx->err_level, cqe_rx->err_opcode);
1819 switch (cqe_rx->err_opcode) {
1820 case CQ_RX_ERROP_RE_PARTIAL:
1821 this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
1823 case CQ_RX_ERROP_RE_JABBER:
1824 this_cpu_inc(nic->drv_stats->rx_jabber_errs);
1826 case CQ_RX_ERROP_RE_FCS:
1827 this_cpu_inc(nic->drv_stats->rx_fcs_errs);
1829 case CQ_RX_ERROP_RE_RX_CTL:
1830 this_cpu_inc(nic->drv_stats->rx_bgx_errs);
1832 case CQ_RX_ERROP_PREL2_ERR:
1833 this_cpu_inc(nic->drv_stats->rx_prel2_errs);
1835 case CQ_RX_ERROP_L2_MAL:
1836 this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
1838 case CQ_RX_ERROP_L2_OVERSIZE:
1839 this_cpu_inc(nic->drv_stats->rx_oversize);
1841 case CQ_RX_ERROP_L2_UNDERSIZE:
1842 this_cpu_inc(nic->drv_stats->rx_undersize);
1844 case CQ_RX_ERROP_L2_LENMISM:
1845 this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
1847 case CQ_RX_ERROP_L2_PCLP:
1848 this_cpu_inc(nic->drv_stats->rx_l2_pclp);
1850 case CQ_RX_ERROP_IP_NOT:
1851 this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
1853 case CQ_RX_ERROP_IP_CSUM_ERR:
1854 this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
1856 case CQ_RX_ERROP_IP_MAL:
1857 this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
1859 case CQ_RX_ERROP_IP_MALD:
1860 this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
1862 case CQ_RX_ERROP_IP_HOP:
1863 this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
1865 case CQ_RX_ERROP_L3_PCLP:
1866 this_cpu_inc(nic->drv_stats->rx_l3_pclp);
1868 case CQ_RX_ERROP_L4_MAL:
1869 this_cpu_inc(nic->drv_stats->rx_l4_malformed);
1871 case CQ_RX_ERROP_L4_CHK:
1872 this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
1874 case CQ_RX_ERROP_UDP_LEN:
1875 this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
1877 case CQ_RX_ERROP_L4_PORT:
1878 this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
1880 case CQ_RX_ERROP_TCP_FLAG:
1881 this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
1883 case CQ_RX_ERROP_TCP_OFFSET:
1884 this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
1886 case CQ_RX_ERROP_L4_PCLP:
1887 this_cpu_inc(nic->drv_stats->rx_l4_pclp);
1889 case CQ_RX_ERROP_RBDR_TRUNC:
1890 this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
1897 /* Check for errors in the send cmp.queue entry */
1898 int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
1900 switch (cqe_tx->send_status) {
1901 case CQ_TX_ERROP_DESC_FAULT:
1902 this_cpu_inc(nic->drv_stats->tx_desc_fault);
1904 case CQ_TX_ERROP_HDR_CONS_ERR:
1905 this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
1907 case CQ_TX_ERROP_SUBDC_ERR:
1908 this_cpu_inc(nic->drv_stats->tx_subdesc_err);
1910 case CQ_TX_ERROP_MAX_SIZE_VIOL:
1911 this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
1913 case CQ_TX_ERROP_IMM_SIZE_OFLOW:
1914 this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
1916 case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
1917 this_cpu_inc(nic->drv_stats->tx_data_seq_err);
1919 case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
1920 this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
1922 case CQ_TX_ERROP_LOCK_VIOL:
1923 this_cpu_inc(nic->drv_stats->tx_lock_viol);
1925 case CQ_TX_ERROP_DATA_FAULT:
1926 this_cpu_inc(nic->drv_stats->tx_data_fault);
1928 case CQ_TX_ERROP_TSTMP_CONFLICT:
1929 this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
1931 case CQ_TX_ERROP_TSTMP_TIMEOUT:
1932 this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
1934 case CQ_TX_ERROP_MEM_FAULT:
1935 this_cpu_inc(nic->drv_stats->tx_mem_fault);
1937 case CQ_TX_ERROP_CK_OVERLAP:
1938 this_cpu_inc(nic->drv_stats->tx_csum_overlap);
1940 case CQ_TX_ERROP_CK_OFLOW:
1941 this_cpu_inc(nic->drv_stats->tx_csum_overflow);