1 /* QLogic qedr NIC Driver
2 * Copyright (c) 2015-2016 QLogic Corporation
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and /or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
37 #include <linux/iommu.h>
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
51 #include <rdma/qedr-abi.h>
54 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
59 size_t min_len = min_t(size_t, len, udata->outlen);
61 return ib_copy_to_udata(udata, src, min_len);
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
66 if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
69 *pkey = QEDR_ROCE_PKEY_DEFAULT;
73 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
76 struct qedr_dev *dev = get_qedr_dev(ibdev);
79 if (!rdma_cap_roce_gid_table(ibdev, port))
82 rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
84 memcpy(sgid, &zgid, sizeof(*sgid));
88 DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
89 sgid->global.interface_id, sgid->global.subnet_prefix);
94 int qedr_add_gid(struct ib_device *device, u8 port_num,
95 unsigned int index, const union ib_gid *gid,
96 const struct ib_gid_attr *attr, void **context)
98 if (!rdma_cap_roce_gid_table(device, port_num))
101 if (port_num > QEDR_MAX_PORT)
110 int qedr_del_gid(struct ib_device *device, u8 port_num,
111 unsigned int index, void **context)
113 if (!rdma_cap_roce_gid_table(device, port_num))
116 if (port_num > QEDR_MAX_PORT)
125 int qedr_query_device(struct ib_device *ibdev,
126 struct ib_device_attr *attr, struct ib_udata *udata)
128 struct qedr_dev *dev = get_qedr_dev(ibdev);
129 struct qedr_device_attr *qattr = &dev->attr;
131 if (!dev->rdma_ctx) {
133 "qedr_query_device called with invalid params rdma_ctx=%p\n",
138 memset(attr, 0, sizeof(*attr));
140 attr->fw_ver = qattr->fw_ver;
141 attr->sys_image_guid = qattr->sys_image_guid;
142 attr->max_mr_size = qattr->max_mr_size;
143 attr->page_size_cap = qattr->page_size_caps;
144 attr->vendor_id = qattr->vendor_id;
145 attr->vendor_part_id = qattr->vendor_part_id;
146 attr->hw_ver = qattr->hw_ver;
147 attr->max_qp = qattr->max_qp;
148 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
149 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
150 IB_DEVICE_RC_RNR_NAK_GEN |
151 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
153 attr->max_sge = qattr->max_sge;
154 attr->max_sge_rd = qattr->max_sge;
155 attr->max_cq = qattr->max_cq;
156 attr->max_cqe = qattr->max_cqe;
157 attr->max_mr = qattr->max_mr;
158 attr->max_mw = qattr->max_mw;
159 attr->max_pd = qattr->max_pd;
160 attr->atomic_cap = dev->atomic_cap;
161 attr->max_fmr = qattr->max_fmr;
162 attr->max_map_per_fmr = 16;
163 attr->max_qp_init_rd_atom =
164 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
165 attr->max_qp_rd_atom =
166 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
167 attr->max_qp_init_rd_atom);
169 attr->max_srq = qattr->max_srq;
170 attr->max_srq_sge = qattr->max_srq_sge;
171 attr->max_srq_wr = qattr->max_srq_wr;
173 attr->local_ca_ack_delay = qattr->dev_ack_delay;
174 attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
175 attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
176 attr->max_ah = qattr->max_ah;
181 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
186 *ib_speed = IB_SPEED_SDR;
187 *ib_width = IB_WIDTH_1X;
190 *ib_speed = IB_SPEED_QDR;
191 *ib_width = IB_WIDTH_1X;
195 *ib_speed = IB_SPEED_DDR;
196 *ib_width = IB_WIDTH_4X;
200 *ib_speed = IB_SPEED_EDR;
201 *ib_width = IB_WIDTH_1X;
205 *ib_speed = IB_SPEED_QDR;
206 *ib_width = IB_WIDTH_4X;
210 *ib_speed = IB_SPEED_HDR;
211 *ib_width = IB_WIDTH_1X;
215 *ib_speed = IB_SPEED_EDR;
216 *ib_width = IB_WIDTH_4X;
221 *ib_speed = IB_SPEED_SDR;
222 *ib_width = IB_WIDTH_1X;
226 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
228 struct qedr_dev *dev;
229 struct qed_rdma_port *rdma_port;
231 dev = get_qedr_dev(ibdev);
233 DP_ERR(dev, "invalid_port=0x%x\n", port);
237 if (!dev->rdma_ctx) {
238 DP_ERR(dev, "rdma_ctx is NULL\n");
242 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
244 /* *attr being zeroed by the caller, avoid zeroing it here */
245 if (rdma_port->port_state == QED_RDMA_PORT_UP) {
246 attr->state = IB_PORT_ACTIVE;
247 attr->phys_state = 5;
249 attr->state = IB_PORT_DOWN;
250 attr->phys_state = 3;
252 attr->max_mtu = IB_MTU_4096;
253 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
258 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
259 attr->gid_tbl_len = QEDR_MAX_SGID;
260 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
261 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
262 attr->qkey_viol_cntr = 0;
263 get_link_speed_and_width(rdma_port->link_speed,
264 &attr->active_speed, &attr->active_width);
265 attr->max_msg_sz = rdma_port->max_msg_size;
266 attr->max_vl_num = 4;
271 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
272 struct ib_port_modify *props)
274 struct qedr_dev *dev;
276 dev = get_qedr_dev(ibdev);
278 DP_ERR(dev, "invalid_port=0x%x\n", port);
285 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
290 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
294 mm->key.phy_addr = phy_addr;
295 /* This function might be called with a length which is not a multiple
296 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
297 * forces this granularity by increasing the requested size if needed.
298 * When qedr_mmap is called, it will search the list with the updated
299 * length as a key. To prevent search failures, the length is rounded up
300 * in advance to PAGE_SIZE.
302 mm->key.len = roundup(len, PAGE_SIZE);
303 INIT_LIST_HEAD(&mm->entry);
305 mutex_lock(&uctx->mm_list_lock);
306 list_add(&mm->entry, &uctx->mm_head);
307 mutex_unlock(&uctx->mm_list_lock);
309 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
310 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
311 (unsigned long long)mm->key.phy_addr,
312 (unsigned long)mm->key.len, uctx);
317 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
323 mutex_lock(&uctx->mm_list_lock);
324 list_for_each_entry(mm, &uctx->mm_head, entry) {
325 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
331 mutex_unlock(&uctx->mm_list_lock);
332 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
333 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
334 mm->key.phy_addr, mm->key.len, uctx, found);
339 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
340 struct ib_udata *udata)
343 struct qedr_ucontext *ctx;
344 struct qedr_alloc_ucontext_resp uresp;
345 struct qedr_dev *dev = get_qedr_dev(ibdev);
346 struct qed_rdma_add_user_out_params oparams;
349 return ERR_PTR(-EFAULT);
351 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
353 return ERR_PTR(-ENOMEM);
355 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
358 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
363 ctx->dpi = oparams.dpi;
364 ctx->dpi_addr = oparams.dpi_addr;
365 ctx->dpi_phys_addr = oparams.dpi_phys_addr;
366 ctx->dpi_size = oparams.dpi_size;
367 INIT_LIST_HEAD(&ctx->mm_head);
368 mutex_init(&ctx->mm_list_lock);
370 memset(&uresp, 0, sizeof(uresp));
372 uresp.dpm_enabled = dev->user_dpm_enabled;
373 uresp.wids_enabled = 1;
374 uresp.wid_count = oparams.wid_count;
375 uresp.db_pa = ctx->dpi_phys_addr;
376 uresp.db_size = ctx->dpi_size;
377 uresp.max_send_wr = dev->attr.max_sqe;
378 uresp.max_recv_wr = dev->attr.max_rqe;
379 uresp.max_srq_wr = dev->attr.max_srq_wr;
380 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
381 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
382 uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
383 uresp.max_cqes = QEDR_MAX_CQES;
385 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
391 rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
395 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
397 return &ctx->ibucontext;
404 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
406 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
407 struct qedr_mm *mm, *tmp;
410 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
412 uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
414 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
415 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
416 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
417 mm->key.phy_addr, mm->key.len, uctx);
418 list_del(&mm->entry);
426 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
428 struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
429 struct qedr_dev *dev = get_qedr_dev(context->device);
430 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
431 u64 unmapped_db = dev->db_phys_addr;
432 unsigned long len = (vma->vm_end - vma->vm_start);
436 DP_DEBUG(dev, QEDR_MSG_INIT,
437 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
438 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
439 if (vma->vm_start & (PAGE_SIZE - 1)) {
440 DP_ERR(dev, "Vma_start not page aligned = %ld\n",
445 found = qedr_search_mmap(ucontext, vm_page, len);
447 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
452 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
454 if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
456 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
457 if (vma->vm_flags & VM_READ) {
458 DP_ERR(dev, "Trying to map doorbell bar for read\n");
462 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
464 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
465 PAGE_SIZE, vma->vm_page_prot);
467 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
468 rc = remap_pfn_range(vma, vma->vm_start,
469 vma->vm_pgoff, len, vma->vm_page_prot);
471 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
475 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
476 struct ib_ucontext *context, struct ib_udata *udata)
478 struct qedr_dev *dev = get_qedr_dev(ibdev);
483 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
484 (udata && context) ? "User Lib" : "Kernel");
486 if (!dev->rdma_ctx) {
487 DP_ERR(dev, "invalid RDMA context\n");
488 return ERR_PTR(-EINVAL);
491 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
493 return ERR_PTR(-ENOMEM);
495 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
501 if (udata && context) {
502 struct qedr_alloc_pd_uresp uresp;
506 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
508 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
509 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
513 pd->uctx = get_qedr_ucontext(context);
524 int qedr_dealloc_pd(struct ib_pd *ibpd)
526 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
527 struct qedr_pd *pd = get_qedr_pd(ibpd);
530 pr_err("Invalid PD received in dealloc_pd\n");
534 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
535 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
542 static void qedr_free_pbl(struct qedr_dev *dev,
543 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
545 struct pci_dev *pdev = dev->pdev;
548 for (i = 0; i < pbl_info->num_pbls; i++) {
551 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
552 pbl[i].va, pbl[i].pa);
558 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
559 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
561 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
562 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
563 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
565 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
566 struct qedr_pbl_info *pbl_info,
569 struct pci_dev *pdev = dev->pdev;
570 struct qedr_pbl *pbl_table;
571 dma_addr_t *pbl_main_tbl;
576 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
578 return ERR_PTR(-ENOMEM);
580 for (i = 0; i < pbl_info->num_pbls; i++) {
581 va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
586 memset(va, 0, pbl_info->pbl_size);
587 pbl_table[i].va = va;
588 pbl_table[i].pa = pa;
591 /* Two-Layer PBLs, if we have more than one pbl we need to initialize
592 * the first one with physical pointers to all of the rest
594 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
595 for (i = 0; i < pbl_info->num_pbls - 1; i++)
596 pbl_main_tbl[i] = pbl_table[i + 1].pa;
601 for (i--; i >= 0; i--)
602 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
603 pbl_table[i].va, pbl_table[i].pa);
605 qedr_free_pbl(dev, pbl_info, pbl_table);
607 return ERR_PTR(-ENOMEM);
610 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
611 struct qedr_pbl_info *pbl_info,
612 u32 num_pbes, int two_layer_capable)
618 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
619 if (num_pbes > MAX_PBES_TWO_LAYER) {
620 DP_ERR(dev, "prepare pbl table: too many pages %d\n",
625 /* calculate required pbl page size */
626 pbl_size = MIN_FW_PBL_PAGE_SIZE;
627 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
628 NUM_PBES_ON_PAGE(pbl_size);
630 while (pbl_capacity < num_pbes) {
632 pbl_capacity = pbl_size / sizeof(u64);
633 pbl_capacity = pbl_capacity * pbl_capacity;
636 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
637 num_pbls++; /* One for the layer0 ( points to the pbls) */
638 pbl_info->two_layered = true;
640 /* One layered PBL */
642 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
643 roundup_pow_of_two((num_pbes * sizeof(u64))));
644 pbl_info->two_layered = false;
647 pbl_info->num_pbls = num_pbls;
648 pbl_info->pbl_size = pbl_size;
649 pbl_info->num_pbes = num_pbes;
651 DP_DEBUG(dev, QEDR_MSG_MR,
652 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
653 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
658 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
659 struct qedr_pbl *pbl,
660 struct qedr_pbl_info *pbl_info, u32 pg_shift)
662 int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
663 u32 fw_pg_cnt, fw_pg_per_umem_pg;
664 struct qedr_pbl *pbl_tbl;
665 struct scatterlist *sg;
670 if (!pbl_info->num_pbes)
673 /* If we have a two layered pbl, the first pbl points to the rest
674 * of the pbls and the first entry lays on the second pbl in the table
676 if (pbl_info->two_layered)
681 pbe = (struct regpair *)pbl_tbl->va;
683 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
689 shift = umem->page_shift;
691 fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
693 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
694 pages = sg_dma_len(sg) >> shift;
695 pg_addr = sg_dma_address(sg);
696 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
697 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
698 pbe->lo = cpu_to_le32(pg_addr);
699 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
701 pg_addr += BIT(pg_shift);
706 if (total_num_pbes == pbl_info->num_pbes)
709 /* If the given pbl is full storing the pbes,
713 (pbl_info->pbl_size / sizeof(u64))) {
715 pbe = (struct regpair *)pbl_tbl->va;
725 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
726 struct qedr_cq *cq, struct ib_udata *udata)
728 struct qedr_create_cq_uresp uresp;
731 memset(&uresp, 0, sizeof(uresp));
733 uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
734 uresp.icid = cq->icid;
736 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
738 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
743 static void consume_cqe(struct qedr_cq *cq)
745 if (cq->latest_cqe == cq->toggle_cqe)
746 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
748 cq->latest_cqe = qed_chain_consume(&cq->pbl);
751 static inline int qedr_align_cq_entries(int entries)
753 u64 size, aligned_size;
755 /* We allocate an extra entry that we don't report to the FW. */
756 size = (entries + 1) * QEDR_CQE_SIZE;
757 aligned_size = ALIGN(size, PAGE_SIZE);
759 return aligned_size / QEDR_CQE_SIZE;
762 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
763 struct qedr_dev *dev,
764 struct qedr_userq *q,
765 u64 buf_addr, size_t buf_len,
766 int access, int dmasync)
771 q->buf_addr = buf_addr;
772 q->buf_len = buf_len;
773 q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
774 if (IS_ERR(q->umem)) {
775 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
777 return PTR_ERR(q->umem);
780 fw_pages = ib_umem_page_count(q->umem) <<
781 (q->umem->page_shift - FW_PAGE_SHIFT);
783 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
787 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
788 if (IS_ERR(q->pbl_tbl)) {
789 rc = PTR_ERR(q->pbl_tbl);
793 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
799 ib_umem_release(q->umem);
804 static inline void qedr_init_cq_params(struct qedr_cq *cq,
805 struct qedr_ucontext *ctx,
806 struct qedr_dev *dev, int vector,
807 int chain_entries, int page_cnt,
809 struct qed_rdma_create_cq_in_params
812 memset(params, 0, sizeof(*params));
813 params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
814 params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
815 params->cnq_id = vector;
816 params->cq_size = chain_entries - 1;
817 params->dpi = (ctx) ? ctx->dpi : dev->dpi;
818 params->pbl_num_pages = page_cnt;
819 params->pbl_ptr = pbl_ptr;
820 params->pbl_two_level = 0;
823 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
825 /* Flush data before signalling doorbell */
827 cq->db.data.agg_flags = flags;
828 cq->db.data.value = cpu_to_le32(cons);
829 writeq(cq->db.raw, cq->db_addr);
831 /* Make sure write would stick */
835 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
837 struct qedr_cq *cq = get_qedr_cq(ibcq);
838 unsigned long sflags;
839 struct qedr_dev *dev;
841 dev = get_qedr_dev(ibcq->device);
845 "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
851 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
854 spin_lock_irqsave(&cq->cq_lock, sflags);
858 if (flags & IB_CQ_SOLICITED)
859 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
861 if (flags & IB_CQ_NEXT_COMP)
862 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
864 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
866 spin_unlock_irqrestore(&cq->cq_lock, sflags);
871 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
872 const struct ib_cq_init_attr *attr,
873 struct ib_ucontext *ib_ctx, struct ib_udata *udata)
875 struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
876 struct qed_rdma_destroy_cq_out_params destroy_oparams;
877 struct qed_rdma_destroy_cq_in_params destroy_iparams;
878 struct qedr_dev *dev = get_qedr_dev(ibdev);
879 struct qed_rdma_create_cq_in_params params;
880 struct qedr_create_cq_ureq ureq;
881 int vector = attr->comp_vector;
882 int entries = attr->cqe;
890 DP_DEBUG(dev, QEDR_MSG_INIT,
891 "create_cq: called from %s. entries=%d, vector=%d\n",
892 udata ? "User Lib" : "Kernel", entries, vector);
894 if (entries > QEDR_MAX_CQES) {
896 "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
897 entries, QEDR_MAX_CQES);
898 return ERR_PTR(-EINVAL);
901 chain_entries = qedr_align_cq_entries(entries);
902 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
904 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
906 return ERR_PTR(-ENOMEM);
909 memset(&ureq, 0, sizeof(ureq));
910 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
912 "create cq: problem copying data from user space\n");
918 "create cq: cannot create a cq with 0 entries\n");
922 cq->cq_type = QEDR_CQ_TYPE_USER;
924 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
925 ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
929 pbl_ptr = cq->q.pbl_tbl->pa;
930 page_cnt = cq->q.pbl_info.num_pbes;
932 cq->ibcq.cqe = chain_entries;
934 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
936 rc = dev->ops->common->chain_alloc(dev->cdev,
937 QED_CHAIN_USE_TO_CONSUME,
939 QED_CHAIN_CNT_TYPE_U32,
941 sizeof(union rdma_cqe),
946 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
947 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
948 cq->ibcq.cqe = cq->pbl.capacity;
951 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
954 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid);
959 cq->sig = QEDR_CQ_MAGIC_NUMBER;
960 spin_lock_init(&cq->cq_lock);
963 rc = qedr_copy_cq_uresp(dev, cq, udata);
967 /* Generate doorbell address. */
968 cq->db_addr = dev->db_addr +
969 DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
970 cq->db.data.icid = cq->icid;
971 cq->db.data.params = DB_AGG_CMD_SET <<
972 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
974 /* point to the very last element, passing it we will toggle */
975 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
976 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
977 cq->latest_cqe = NULL;
979 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
982 DP_DEBUG(dev, QEDR_MSG_CQ,
983 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
984 cq->icid, cq, params.cq_size);
989 destroy_iparams.icid = cq->icid;
990 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
994 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
996 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
999 ib_umem_release(cq->q.umem);
1002 return ERR_PTR(-EINVAL);
1005 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1007 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1008 struct qedr_cq *cq = get_qedr_cq(ibcq);
1010 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1015 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
1016 #define QEDR_DESTROY_CQ_ITER_DURATION (10)
1018 int qedr_destroy_cq(struct ib_cq *ibcq)
1020 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1021 struct qed_rdma_destroy_cq_out_params oparams;
1022 struct qed_rdma_destroy_cq_in_params iparams;
1023 struct qedr_cq *cq = get_qedr_cq(ibcq);
1027 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1031 /* GSIs CQs are handled by driver, so they don't exist in the FW */
1032 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1035 iparams.icid = cq->icid;
1036 rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1040 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1042 if (ibcq->uobject && ibcq->uobject->context) {
1043 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1044 ib_umem_release(cq->q.umem);
1047 /* We don't want the IRQ handler to handle a non-existing CQ so we
1048 * wait until all CNQ interrupts, if any, are received. This will always
1049 * happen and will always happen very fast. If not, then a serious error
1050 * has occured. That is why we can use a long delay.
1051 * We spin for a short time so we don’t lose time on context switching
1052 * in case all the completions are handled in that span. Otherwise
1053 * we sleep for a while and check again. Since the CNQ may be
1054 * associated with (only) the current CPU we use msleep to allow the
1055 * current CPU to be freed.
1056 * The CNQ notification is increased in qedr_irq_handler().
1058 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1059 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1060 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1064 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1065 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1066 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1070 if (oparams.num_cq_notif != cq->cnq_notif)
1073 /* Note that we don't need to have explicit code to wait for the
1074 * completion of the event handler because it is invoked from the EQ.
1075 * Since the destroy CQ ramrod has also been received on the EQ we can
1076 * be certain that there's no event handler in process.
1087 "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1088 cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1093 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1094 struct ib_qp_attr *attr,
1096 struct qed_rdma_modify_qp_in_params
1099 enum rdma_network_type nw_type;
1100 struct ib_gid_attr gid_attr;
1101 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1107 rc = ib_get_cached_gid(ibqp->device,
1108 rdma_ah_get_port_num(&attr->ah_attr),
1109 grh->sgid_index, &gid, &gid_attr);
1113 if (!memcmp(&gid, &zgid, sizeof(gid)))
1116 if (gid_attr.ndev) {
1117 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1119 dev_put(gid_attr.ndev);
1120 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1122 case RDMA_NETWORK_IPV6:
1123 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1124 sizeof(qp_params->sgid));
1125 memcpy(&qp_params->dgid.bytes[0],
1127 sizeof(qp_params->dgid));
1128 qp_params->roce_mode = ROCE_V2_IPV6;
1129 SET_FIELD(qp_params->modify_flags,
1130 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1132 case RDMA_NETWORK_IB:
1133 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1134 sizeof(qp_params->sgid));
1135 memcpy(&qp_params->dgid.bytes[0],
1137 sizeof(qp_params->dgid));
1138 qp_params->roce_mode = ROCE_V1;
1140 case RDMA_NETWORK_IPV4:
1141 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1142 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1143 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1144 qp_params->sgid.ipv4_addr = ipv4_addr;
1146 qedr_get_ipv4_from_gid(grh->dgid.raw);
1147 qp_params->dgid.ipv4_addr = ipv4_addr;
1148 SET_FIELD(qp_params->modify_flags,
1149 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1150 qp_params->roce_mode = ROCE_V2_IPV4;
1155 for (i = 0; i < 4; i++) {
1156 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1157 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1160 if (qp_params->vlan_id >= VLAN_CFI_MASK)
1161 qp_params->vlan_id = 0;
1166 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1167 struct ib_qp_init_attr *attrs)
1169 struct qedr_device_attr *qattr = &dev->attr;
1171 /* QP0... attrs->qp_type == IB_QPT_GSI */
1172 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1173 DP_DEBUG(dev, QEDR_MSG_QP,
1174 "create qp: unsupported qp type=0x%x requested\n",
1179 if (attrs->cap.max_send_wr > qattr->max_sqe) {
1181 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1182 attrs->cap.max_send_wr, qattr->max_sqe);
1186 if (attrs->cap.max_inline_data > qattr->max_inline) {
1188 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1189 attrs->cap.max_inline_data, qattr->max_inline);
1193 if (attrs->cap.max_send_sge > qattr->max_sge) {
1195 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1196 attrs->cap.max_send_sge, qattr->max_sge);
1200 if (attrs->cap.max_recv_sge > qattr->max_sge) {
1202 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1203 attrs->cap.max_recv_sge, qattr->max_sge);
1207 /* Unprivileged user space cannot create special QP */
1208 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1210 "create qp: userspace can't create special QPs of type=0x%x\n",
1218 static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
1221 uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1222 uresp->rq_icid = qp->icid;
1225 static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
1228 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1229 uresp->sq_icid = qp->icid + 1;
1232 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1233 struct qedr_qp *qp, struct ib_udata *udata)
1235 struct qedr_create_qp_uresp uresp;
1238 memset(&uresp, 0, sizeof(uresp));
1239 qedr_copy_sq_uresp(&uresp, qp);
1240 qedr_copy_rq_uresp(&uresp, qp);
1242 uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1243 uresp.qp_id = qp->qp_id;
1245 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1248 "create qp: failed a copy to user space with qp icid=0x%x.\n",
1254 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1257 struct ib_qp_init_attr *attrs)
1259 spin_lock_init(&qp->q_lock);
1261 qp->qp_type = attrs->qp_type;
1262 qp->max_inline_data = attrs->cap.max_inline_data;
1263 qp->sq.max_sges = attrs->cap.max_send_sge;
1264 qp->state = QED_ROCE_QP_STATE_RESET;
1265 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1266 qp->sq_cq = get_qedr_cq(attrs->send_cq);
1267 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1269 qp->rq.max_sges = attrs->cap.max_recv_sge;
1271 DP_DEBUG(dev, QEDR_MSG_QP,
1272 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1273 qp->rq.max_sges, qp->rq_cq->icid);
1274 DP_DEBUG(dev, QEDR_MSG_QP,
1275 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1276 pd->pd_id, qp->qp_type, qp->max_inline_data,
1277 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1278 DP_DEBUG(dev, QEDR_MSG_QP,
1279 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1280 qp->sq.max_sges, qp->sq_cq->icid);
1283 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1285 qp->sq.db = dev->db_addr +
1286 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1287 qp->sq.db_data.data.icid = qp->icid + 1;
1288 qp->rq.db = dev->db_addr +
1289 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1290 qp->rq.db_data.data.icid = qp->icid;
1294 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1297 struct ib_qp_init_attr *attrs,
1298 bool fmr_and_reserved_lkey,
1299 struct qed_rdma_create_qp_in_params *params)
1301 /* QP handle to be written in an async event */
1302 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1303 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1305 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1306 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1307 params->pd = pd->pd_id;
1308 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1309 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1310 params->stats_queue = 0;
1311 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1313 params->use_srq = false;
1316 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1318 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1327 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1330 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1333 ib_umem_release(qp->usq.umem);
1334 qp->usq.umem = NULL;
1337 ib_umem_release(qp->urq.umem);
1338 qp->urq.umem = NULL;
1340 if (rdma_protocol_roce(&dev->ibdev, 1)) {
1341 qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
1342 qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
1344 kfree(qp->usq.pbl_tbl);
1345 kfree(qp->urq.pbl_tbl);
1349 static int qedr_create_user_qp(struct qedr_dev *dev,
1352 struct ib_udata *udata,
1353 struct ib_qp_init_attr *attrs)
1355 struct qed_rdma_create_qp_in_params in_params;
1356 struct qed_rdma_create_qp_out_params out_params;
1357 struct qedr_pd *pd = get_qedr_pd(ibpd);
1358 struct ib_ucontext *ib_ctx = NULL;
1359 struct qedr_ucontext *ctx = NULL;
1360 struct qedr_create_qp_ureq ureq;
1363 ib_ctx = ibpd->uobject->context;
1364 ctx = get_qedr_ucontext(ib_ctx);
1366 memset(&ureq, 0, sizeof(ureq));
1367 rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1369 DP_ERR(dev, "Problem copying data from user space\n");
1373 /* SQ - read access only (0), dma sync not required (0) */
1374 rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1379 /* RQ - read access only (0), dma sync not required (0) */
1380 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1386 memset(&in_params, 0, sizeof(in_params));
1387 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1388 in_params.qp_handle_lo = ureq.qp_handle_lo;
1389 in_params.qp_handle_hi = ureq.qp_handle_hi;
1390 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1391 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1392 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1393 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1395 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1396 &in_params, &out_params);
1403 qp->qp_id = out_params.qp_id;
1404 qp->icid = out_params.icid;
1406 rc = qedr_copy_qp_uresp(dev, qp, udata);
1410 qedr_qp_user_print(dev, qp);
1414 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1416 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1419 qedr_cleanup_user(dev, qp);
1424 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1426 struct qed_rdma_create_qp_in_params *in_params,
1427 u32 n_sq_elems, u32 n_rq_elems)
1429 struct qed_rdma_create_qp_out_params out_params;
1432 rc = dev->ops->common->chain_alloc(dev->cdev,
1433 QED_CHAIN_USE_TO_PRODUCE,
1435 QED_CHAIN_CNT_TYPE_U32,
1437 QEDR_SQE_ELEMENT_SIZE,
1443 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1444 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1446 rc = dev->ops->common->chain_alloc(dev->cdev,
1447 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1449 QED_CHAIN_CNT_TYPE_U32,
1451 QEDR_RQE_ELEMENT_SIZE,
1456 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1457 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1459 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1460 in_params, &out_params);
1465 qp->qp_id = out_params.qp_id;
1466 qp->icid = out_params.icid;
1468 qedr_set_roce_db_info(dev, qp);
1473 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1475 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1476 kfree(qp->wqe_wr_id);
1478 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1479 kfree(qp->rqe_wr_id);
1482 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1485 struct ib_qp_init_attr *attrs)
1487 struct qed_rdma_create_qp_in_params in_params;
1488 struct qedr_pd *pd = get_qedr_pd(ibpd);
1494 memset(&in_params, 0, sizeof(in_params));
1496 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1497 * the ring. The ring should allow at least a single WR, even if the
1498 * user requested none, due to allocation issues.
1499 * We should add an extra WR since the prod and cons indices of
1500 * wqe_wr_id are managed in such a way that the WQ is considered full
1501 * when (prod+1)%max_wr==cons. We currently don't do that because we
1502 * double the number of entries due an iSER issue that pushes far more
1503 * WRs than indicated. If we decline its ib_post_send() then we get
1504 * error prints in the dmesg we'd like to avoid.
1506 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1509 qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1511 if (!qp->wqe_wr_id) {
1512 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1516 /* QP handle to be written in CQE */
1517 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1518 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1520 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1521 * the ring. There ring should allow at least a single WR, even if the
1522 * user requested none, due to allocation issues.
1524 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1526 /* Allocate driver internal RQ array */
1527 qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1529 if (!qp->rqe_wr_id) {
1531 "create qp: failed RQ shadow memory allocation\n");
1532 kfree(qp->wqe_wr_id);
1536 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1538 n_sq_entries = attrs->cap.max_send_wr;
1539 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1540 n_sq_entries = max_t(u32, n_sq_entries, 1);
1541 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1543 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1545 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1546 n_sq_elems, n_rq_elems);
1548 qedr_cleanup_kernel(dev, qp);
1553 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1554 struct ib_qp_init_attr *attrs,
1555 struct ib_udata *udata)
1557 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1558 struct qedr_pd *pd = get_qedr_pd(ibpd);
1563 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1564 udata ? "user library" : "kernel", pd);
1566 rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1571 return ERR_PTR(-EINVAL);
1573 DP_DEBUG(dev, QEDR_MSG_QP,
1574 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1575 udata ? "user library" : "kernel", attrs->event_handler, pd,
1576 get_qedr_cq(attrs->send_cq),
1577 get_qedr_cq(attrs->send_cq)->icid,
1578 get_qedr_cq(attrs->recv_cq),
1579 get_qedr_cq(attrs->recv_cq)->icid);
1581 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1583 DP_ERR(dev, "create qp: failed allocating memory\n");
1584 return ERR_PTR(-ENOMEM);
1587 qedr_set_common_qp_params(dev, qp, pd, attrs);
1589 if (attrs->qp_type == IB_QPT_GSI) {
1590 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1597 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1599 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1604 qp->ibqp.qp_num = qp->qp_id;
1611 return ERR_PTR(-EFAULT);
1614 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1617 case QED_ROCE_QP_STATE_RESET:
1618 return IB_QPS_RESET;
1619 case QED_ROCE_QP_STATE_INIT:
1621 case QED_ROCE_QP_STATE_RTR:
1623 case QED_ROCE_QP_STATE_RTS:
1625 case QED_ROCE_QP_STATE_SQD:
1627 case QED_ROCE_QP_STATE_ERR:
1629 case QED_ROCE_QP_STATE_SQE:
1635 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1636 enum ib_qp_state qp_state)
1640 return QED_ROCE_QP_STATE_RESET;
1642 return QED_ROCE_QP_STATE_INIT;
1644 return QED_ROCE_QP_STATE_RTR;
1646 return QED_ROCE_QP_STATE_RTS;
1648 return QED_ROCE_QP_STATE_SQD;
1650 return QED_ROCE_QP_STATE_ERR;
1652 return QED_ROCE_QP_STATE_ERR;
1656 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1658 qed_chain_reset(&qph->pbl);
1662 qph->db_data.data.value = cpu_to_le16(0);
1665 static int qedr_update_qp_state(struct qedr_dev *dev,
1667 enum qed_roce_qp_state cur_state,
1668 enum qed_roce_qp_state new_state)
1672 if (new_state == cur_state)
1675 switch (cur_state) {
1676 case QED_ROCE_QP_STATE_RESET:
1677 switch (new_state) {
1678 case QED_ROCE_QP_STATE_INIT:
1679 qp->prev_wqe_size = 0;
1680 qedr_reset_qp_hwq_info(&qp->sq);
1681 qedr_reset_qp_hwq_info(&qp->rq);
1688 case QED_ROCE_QP_STATE_INIT:
1689 switch (new_state) {
1690 case QED_ROCE_QP_STATE_RTR:
1691 /* Update doorbell (in case post_recv was
1692 * done before move to RTR)
1695 writel(qp->rq.db_data.raw, qp->rq.db);
1696 /* Make sure write takes effect */
1699 case QED_ROCE_QP_STATE_ERR:
1702 /* Invalid state change. */
1707 case QED_ROCE_QP_STATE_RTR:
1709 switch (new_state) {
1710 case QED_ROCE_QP_STATE_RTS:
1712 case QED_ROCE_QP_STATE_ERR:
1715 /* Invalid state change. */
1720 case QED_ROCE_QP_STATE_RTS:
1722 switch (new_state) {
1723 case QED_ROCE_QP_STATE_SQD:
1725 case QED_ROCE_QP_STATE_ERR:
1728 /* Invalid state change. */
1733 case QED_ROCE_QP_STATE_SQD:
1735 switch (new_state) {
1736 case QED_ROCE_QP_STATE_RTS:
1737 case QED_ROCE_QP_STATE_ERR:
1740 /* Invalid state change. */
1745 case QED_ROCE_QP_STATE_ERR:
1747 switch (new_state) {
1748 case QED_ROCE_QP_STATE_RESET:
1749 if ((qp->rq.prod != qp->rq.cons) ||
1750 (qp->sq.prod != qp->sq.cons)) {
1752 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1753 qp->rq.prod, qp->rq.cons, qp->sq.prod,
1771 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1772 int attr_mask, struct ib_udata *udata)
1774 struct qedr_qp *qp = get_qedr_qp(ibqp);
1775 struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1776 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1777 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1778 enum ib_qp_state old_qp_state, new_qp_state;
1779 enum qed_roce_qp_state cur_state;
1782 DP_DEBUG(dev, QEDR_MSG_QP,
1783 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1786 old_qp_state = qedr_get_ibqp_state(qp->state);
1787 if (attr_mask & IB_QP_STATE)
1788 new_qp_state = attr->qp_state;
1790 new_qp_state = old_qp_state;
1792 if (!ib_modify_qp_is_ok
1793 (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
1794 IB_LINK_LAYER_ETHERNET)) {
1796 "modify qp: invalid attribute mask=0x%x specified for\n"
1797 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1798 attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
1804 /* Translate the masks... */
1805 if (attr_mask & IB_QP_STATE) {
1806 SET_FIELD(qp_params.modify_flags,
1807 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1808 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1811 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1812 qp_params.sqd_async = true;
1814 if (attr_mask & IB_QP_PKEY_INDEX) {
1815 SET_FIELD(qp_params.modify_flags,
1816 QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1817 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1822 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1825 if (attr_mask & IB_QP_QKEY)
1826 qp->qkey = attr->qkey;
1828 if (attr_mask & IB_QP_ACCESS_FLAGS) {
1829 SET_FIELD(qp_params.modify_flags,
1830 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1831 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1832 IB_ACCESS_REMOTE_READ;
1833 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1834 IB_ACCESS_REMOTE_WRITE;
1835 qp_params.incoming_atomic_en = attr->qp_access_flags &
1836 IB_ACCESS_REMOTE_ATOMIC;
1839 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1840 if (attr_mask & IB_QP_PATH_MTU) {
1841 if (attr->path_mtu < IB_MTU_256 ||
1842 attr->path_mtu > IB_MTU_4096) {
1843 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1847 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1848 ib_mtu_enum_to_int(iboe_get_mtu
1854 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1855 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1858 SET_FIELD(qp_params.modify_flags,
1859 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1861 qp_params.traffic_class_tos = grh->traffic_class;
1862 qp_params.flow_label = grh->flow_label;
1863 qp_params.hop_limit_ttl = grh->hop_limit;
1865 qp->sgid_idx = grh->sgid_index;
1867 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1870 "modify qp: problems with GID index %d (rc=%d)\n",
1871 grh->sgid_index, rc);
1875 rc = qedr_get_dmac(dev, &attr->ah_attr,
1876 qp_params.remote_mac_addr);
1880 qp_params.use_local_mac = true;
1881 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
1883 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
1884 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
1885 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
1886 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
1887 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
1888 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
1889 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
1890 qp_params.remote_mac_addr);
1892 qp_params.mtu = qp->mtu;
1893 qp_params.lb_indication = false;
1896 if (!qp_params.mtu) {
1897 /* Stay with current MTU */
1899 qp_params.mtu = qp->mtu;
1902 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1905 if (attr_mask & IB_QP_TIMEOUT) {
1906 SET_FIELD(qp_params.modify_flags,
1907 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
1909 /* The received timeout value is an exponent used like this:
1910 * "12.7.34 LOCAL ACK TIMEOUT
1911 * Value representing the transport (ACK) timeout for use by
1912 * the remote, expressed as: 4.096 * 2^timeout [usec]"
1913 * The FW expects timeout in msec so we need to divide the usec
1914 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
1915 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
1916 * The value of zero means infinite so we use a 'max_t' to make
1917 * sure that sub 1 msec values will be configured as 1 msec.
1920 qp_params.ack_timeout =
1921 1 << max_t(int, attr->timeout - 8, 0);
1923 qp_params.ack_timeout = 0;
1925 qp->timeout = attr->timeout;
1928 if (attr_mask & IB_QP_RETRY_CNT) {
1929 SET_FIELD(qp_params.modify_flags,
1930 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
1931 qp_params.retry_cnt = attr->retry_cnt;
1934 if (attr_mask & IB_QP_RNR_RETRY) {
1935 SET_FIELD(qp_params.modify_flags,
1936 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
1937 qp_params.rnr_retry_cnt = attr->rnr_retry;
1940 if (attr_mask & IB_QP_RQ_PSN) {
1941 SET_FIELD(qp_params.modify_flags,
1942 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
1943 qp_params.rq_psn = attr->rq_psn;
1944 qp->rq_psn = attr->rq_psn;
1947 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1948 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
1951 "unsupported max_rd_atomic=%d, supported=%d\n",
1952 attr->max_rd_atomic,
1953 dev->attr.max_qp_req_rd_atomic_resc);
1957 SET_FIELD(qp_params.modify_flags,
1958 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
1959 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
1962 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1963 SET_FIELD(qp_params.modify_flags,
1964 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
1965 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
1968 if (attr_mask & IB_QP_SQ_PSN) {
1969 SET_FIELD(qp_params.modify_flags,
1970 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
1971 qp_params.sq_psn = attr->sq_psn;
1972 qp->sq_psn = attr->sq_psn;
1975 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1976 if (attr->max_dest_rd_atomic >
1977 dev->attr.max_qp_resp_rd_atomic_resc) {
1979 "unsupported max_dest_rd_atomic=%d, supported=%d\n",
1980 attr->max_dest_rd_atomic,
1981 dev->attr.max_qp_resp_rd_atomic_resc);
1987 SET_FIELD(qp_params.modify_flags,
1988 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
1989 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
1992 if (attr_mask & IB_QP_DEST_QPN) {
1993 SET_FIELD(qp_params.modify_flags,
1994 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
1996 qp_params.dest_qp = attr->dest_qp_num;
1997 qp->dest_qp_num = attr->dest_qp_num;
2000 cur_state = qp->state;
2002 /* Update the QP state before the actual ramrod to prevent a race with
2003 * fast path. Modifying the QP state to error will cause the device to
2004 * flush the CQEs and while polling the flushed CQEs will considered as
2005 * a potential issue if the QP isn't in error state.
2007 if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2008 !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2009 qp->state = QED_ROCE_QP_STATE_ERR;
2011 if (qp->qp_type != IB_QPT_GSI)
2012 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2013 qp->qed_qp, &qp_params);
2015 if (attr_mask & IB_QP_STATE) {
2016 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2017 rc = qedr_update_qp_state(dev, qp, cur_state,
2018 qp_params.new_state);
2019 qp->state = qp_params.new_state;
2026 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2028 int ib_qp_acc_flags = 0;
2030 if (params->incoming_rdma_write_en)
2031 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2032 if (params->incoming_rdma_read_en)
2033 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2034 if (params->incoming_atomic_en)
2035 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2036 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2037 return ib_qp_acc_flags;
2040 int qedr_query_qp(struct ib_qp *ibqp,
2041 struct ib_qp_attr *qp_attr,
2042 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2044 struct qed_rdma_query_qp_out_params params;
2045 struct qedr_qp *qp = get_qedr_qp(ibqp);
2046 struct qedr_dev *dev = qp->dev;
2049 memset(¶ms, 0, sizeof(params));
2050 memset(qp_attr, 0, sizeof(*qp_attr));
2051 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2053 if (qp->qp_type != IB_QPT_GSI) {
2054 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms);
2057 qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2059 qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS);
2062 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2063 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2064 qp_attr->path_mig_state = IB_MIG_MIGRATED;
2065 qp_attr->rq_psn = params.rq_psn;
2066 qp_attr->sq_psn = params.sq_psn;
2067 qp_attr->dest_qp_num = params.dest_qp;
2069 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms);
2071 qp_attr->cap.max_send_wr = qp->sq.max_wr;
2072 qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2073 qp_attr->cap.max_send_sge = qp->sq.max_sges;
2074 qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2075 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2076 qp_init_attr->cap = qp_attr->cap;
2078 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2079 rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2080 params.flow_label, qp->sgid_idx,
2081 params.hop_limit_ttl, params.traffic_class_tos);
2082 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]);
2083 rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2084 rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2085 qp_attr->timeout = qp->timeout;
2086 qp_attr->rnr_retry = params.rnr_retry;
2087 qp_attr->retry_cnt = params.retry_cnt;
2088 qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2089 qp_attr->pkey_index = params.pkey_index;
2090 qp_attr->port_num = 1;
2091 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2092 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2093 qp_attr->alt_pkey_index = 0;
2094 qp_attr->alt_port_num = 0;
2095 qp_attr->alt_timeout = 0;
2096 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2098 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2099 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2100 qp_attr->max_rd_atomic = params.max_rd_atomic;
2101 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2103 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2104 qp_attr->cap.max_inline_data);
2110 int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2114 if (qp->qp_type != IB_QPT_GSI) {
2115 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2120 if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2121 qedr_cleanup_user(dev, qp);
2123 qedr_cleanup_kernel(dev, qp);
2128 int qedr_destroy_qp(struct ib_qp *ibqp)
2130 struct qedr_qp *qp = get_qedr_qp(ibqp);
2131 struct qedr_dev *dev = qp->dev;
2132 struct ib_qp_attr attr;
2136 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2139 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2140 (qp->state != QED_ROCE_QP_STATE_ERR) &&
2141 (qp->state != QED_ROCE_QP_STATE_INIT)) {
2143 attr.qp_state = IB_QPS_ERR;
2144 attr_mask |= IB_QP_STATE;
2146 /* Change the QP state to ERROR */
2147 qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2150 if (qp->qp_type == IB_QPT_GSI)
2151 qedr_destroy_gsi_qp(dev);
2153 qedr_free_qp_resources(dev, qp);
2160 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2161 struct ib_udata *udata)
2165 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2167 return ERR_PTR(-ENOMEM);
2174 int qedr_destroy_ah(struct ib_ah *ibah)
2176 struct qedr_ah *ah = get_qedr_ah(ibah);
2182 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2184 struct qedr_pbl *pbl, *tmp;
2186 if (info->pbl_table)
2187 list_add_tail(&info->pbl_table->list_entry,
2188 &info->free_pbl_list);
2190 if (!list_empty(&info->inuse_pbl_list))
2191 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2193 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2194 list_del(&pbl->list_entry);
2195 qedr_free_pbl(dev, &info->pbl_info, pbl);
2199 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2200 size_t page_list_len, bool two_layered)
2202 struct qedr_pbl *tmp;
2205 INIT_LIST_HEAD(&info->free_pbl_list);
2206 INIT_LIST_HEAD(&info->inuse_pbl_list);
2208 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2209 page_list_len, two_layered);
2213 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2214 if (IS_ERR(info->pbl_table)) {
2215 rc = PTR_ERR(info->pbl_table);
2219 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2220 &info->pbl_table->pa);
2222 /* in usual case we use 2 PBLs, so we add one to free
2223 * list and allocating another one
2225 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2227 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2231 list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2233 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2237 free_mr_info(dev, info);
2242 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2243 u64 usr_addr, int acc, struct ib_udata *udata)
2245 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2250 pd = get_qedr_pd(ibpd);
2251 DP_DEBUG(dev, QEDR_MSG_MR,
2252 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2253 pd->pd_id, start, len, usr_addr, acc);
2255 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2256 return ERR_PTR(-EINVAL);
2258 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2262 mr->type = QEDR_MR_USER;
2264 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2265 if (IS_ERR(mr->umem)) {
2270 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2274 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2275 &mr->info.pbl_info, mr->umem->page_shift);
2277 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2279 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2283 /* Index only, 18 bit long, lkey = itid << 8 | key */
2284 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2286 mr->hw_mr.pd = pd->pd_id;
2287 mr->hw_mr.local_read = 1;
2288 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2289 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2290 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2291 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2292 mr->hw_mr.mw_bind = false;
2293 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2294 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2295 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2296 mr->hw_mr.page_size_log = mr->umem->page_shift;
2297 mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2298 mr->hw_mr.length = len;
2299 mr->hw_mr.vaddr = usr_addr;
2300 mr->hw_mr.zbva = false;
2301 mr->hw_mr.phy_mr = false;
2302 mr->hw_mr.dma_mr = false;
2304 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2306 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2310 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2311 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2312 mr->hw_mr.remote_atomic)
2313 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2315 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2320 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2322 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2328 int qedr_dereg_mr(struct ib_mr *ib_mr)
2330 struct qedr_mr *mr = get_qedr_mr(ib_mr);
2331 struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2334 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2338 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2340 if (mr->type != QEDR_MR_DMA)
2341 free_mr_info(dev, &mr->info);
2343 /* it could be user registered memory. */
2345 ib_umem_release(mr->umem);
2352 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2353 int max_page_list_len)
2355 struct qedr_pd *pd = get_qedr_pd(ibpd);
2356 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2360 DP_DEBUG(dev, QEDR_MSG_MR,
2361 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2364 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2369 mr->type = QEDR_MR_FRMR;
2371 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2375 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2377 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2381 /* Index only, 18 bit long, lkey = itid << 8 | key */
2382 mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2384 mr->hw_mr.pd = pd->pd_id;
2385 mr->hw_mr.local_read = 1;
2386 mr->hw_mr.local_write = 0;
2387 mr->hw_mr.remote_read = 0;
2388 mr->hw_mr.remote_write = 0;
2389 mr->hw_mr.remote_atomic = 0;
2390 mr->hw_mr.mw_bind = false;
2391 mr->hw_mr.pbl_ptr = 0;
2392 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2393 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2395 mr->hw_mr.length = 0;
2396 mr->hw_mr.vaddr = 0;
2397 mr->hw_mr.zbva = false;
2398 mr->hw_mr.phy_mr = true;
2399 mr->hw_mr.dma_mr = false;
2401 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2403 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2407 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2408 mr->ibmr.rkey = mr->ibmr.lkey;
2410 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2414 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2420 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2421 enum ib_mr_type mr_type, u32 max_num_sg)
2423 struct qedr_dev *dev;
2426 if (mr_type != IB_MR_TYPE_MEM_REG)
2427 return ERR_PTR(-EINVAL);
2429 mr = __qedr_alloc_mr(ibpd, max_num_sg);
2432 return ERR_PTR(-EINVAL);
2439 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2441 struct qedr_mr *mr = get_qedr_mr(ibmr);
2442 struct qedr_pbl *pbl_table;
2443 struct regpair *pbe;
2446 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2447 DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2451 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2454 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2455 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2456 pbe = (struct regpair *)pbl_table->va;
2457 pbe += mr->npages % pbes_in_page;
2458 pbe->lo = cpu_to_le32((u32)addr);
2459 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2466 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2468 int work = info->completed - info->completed_handled - 1;
2470 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2471 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2472 struct qedr_pbl *pbl;
2474 /* Free all the page list that are possible to be freed
2475 * (all the ones that were invalidated), under the assumption
2476 * that if an FMR was completed successfully that means that
2477 * if there was an invalidate operation before it also ended
2479 pbl = list_first_entry(&info->inuse_pbl_list,
2480 struct qedr_pbl, list_entry);
2481 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2482 info->completed_handled++;
2486 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2487 int sg_nents, unsigned int *sg_offset)
2489 struct qedr_mr *mr = get_qedr_mr(ibmr);
2493 handle_completed_mrs(mr->dev, &mr->info);
2494 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2497 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2499 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2500 struct qedr_pd *pd = get_qedr_pd(ibpd);
2504 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2506 return ERR_PTR(-ENOMEM);
2508 mr->type = QEDR_MR_DMA;
2510 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2512 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2516 /* index only, 18 bit long, lkey = itid << 8 | key */
2517 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2518 mr->hw_mr.pd = pd->pd_id;
2519 mr->hw_mr.local_read = 1;
2520 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2521 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2522 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2523 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2524 mr->hw_mr.dma_mr = true;
2526 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2528 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2532 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2533 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2534 mr->hw_mr.remote_atomic)
2535 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2537 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2541 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2547 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2549 return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2552 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2556 for (i = 0; i < num_sge; i++)
2557 len += sg_list[i].length;
2562 static void swap_wqe_data64(u64 *p)
2566 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2567 *p = cpu_to_be64(cpu_to_le64(*p));
2570 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2571 struct qedr_qp *qp, u8 *wqe_size,
2572 struct ib_send_wr *wr,
2573 struct ib_send_wr **bad_wr, u8 *bits,
2576 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2577 char *seg_prt, *wqe;
2580 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2581 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2595 /* Copy data inline */
2596 for (i = 0; i < wr->num_sge; i++) {
2597 u32 len = wr->sg_list[i].length;
2598 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2603 /* New segment required */
2605 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2607 seg_siz = sizeof(struct rdma_sq_common_wqe);
2611 /* Calculate currently allowed length */
2612 cur = min_t(u32, len, seg_siz);
2613 memcpy(seg_prt, src, cur);
2615 /* Update segment variables */
2619 /* Update sge variables */
2623 /* Swap fully-completed segments */
2625 swap_wqe_data64((u64 *)wqe);
2629 /* swap last not completed segment */
2631 swap_wqe_data64((u64 *)wqe);
2636 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \
2638 DMA_REGPAIR_LE(sge->addr, vaddr); \
2639 (sge)->length = cpu_to_le32(vlength); \
2640 (sge)->flags = cpu_to_le32(vflags); \
2643 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \
2645 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \
2646 (hdr)->num_sges = num_sge; \
2649 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \
2651 DMA_REGPAIR_LE(sge->addr, vaddr); \
2652 (sge)->length = cpu_to_le32(vlength); \
2653 (sge)->l_key = cpu_to_le32(vlkey); \
2656 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2657 struct ib_send_wr *wr)
2662 for (i = 0; i < wr->num_sge; i++) {
2663 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2665 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2666 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2667 sge->length = cpu_to_le32(wr->sg_list[i].length);
2668 data_size += wr->sg_list[i].length;
2672 *wqe_size += wr->num_sge;
2677 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2679 struct rdma_sq_rdma_wqe_1st *rwqe,
2680 struct rdma_sq_rdma_wqe_2nd *rwqe2,
2681 struct ib_send_wr *wr,
2682 struct ib_send_wr **bad_wr)
2684 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2685 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2687 if (wr->send_flags & IB_SEND_INLINE &&
2688 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2689 wr->opcode == IB_WR_RDMA_WRITE)) {
2692 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2693 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2694 bad_wr, &rwqe->flags, flags);
2697 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2700 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2702 struct rdma_sq_send_wqe_1st *swqe,
2703 struct rdma_sq_send_wqe_2st *swqe2,
2704 struct ib_send_wr *wr,
2705 struct ib_send_wr **bad_wr)
2707 memset(swqe2, 0, sizeof(*swqe2));
2708 if (wr->send_flags & IB_SEND_INLINE) {
2711 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2712 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2713 bad_wr, &swqe->flags, flags);
2716 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2719 static int qedr_prepare_reg(struct qedr_qp *qp,
2720 struct rdma_sq_fmr_wqe_1st *fwqe1,
2721 struct ib_reg_wr *wr)
2723 struct qedr_mr *mr = get_qedr_mr(wr->mr);
2724 struct rdma_sq_fmr_wqe_2nd *fwqe2;
2726 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2727 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2728 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2729 fwqe1->l_key = wr->key;
2731 fwqe2->access_ctrl = 0;
2733 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2734 !!(wr->access & IB_ACCESS_REMOTE_READ));
2735 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2736 !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2737 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2738 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2739 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2740 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2741 !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2742 fwqe2->fmr_ctrl = 0;
2744 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2745 ilog2(mr->ibmr.page_size) - 12);
2747 fwqe2->length_hi = 0;
2748 fwqe2->length_lo = mr->ibmr.length;
2749 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2750 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2752 qp->wqe_wr_id[qp->sq.prod].mr = mr;
2757 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2760 case IB_WR_RDMA_WRITE:
2761 case IB_WR_RDMA_WRITE_WITH_IMM:
2762 return IB_WC_RDMA_WRITE;
2763 case IB_WR_SEND_WITH_IMM:
2765 case IB_WR_SEND_WITH_INV:
2767 case IB_WR_RDMA_READ:
2768 return IB_WC_RDMA_READ;
2769 case IB_WR_ATOMIC_CMP_AND_SWP:
2770 return IB_WC_COMP_SWAP;
2771 case IB_WR_ATOMIC_FETCH_AND_ADD:
2772 return IB_WC_FETCH_ADD;
2774 return IB_WC_REG_MR;
2775 case IB_WR_LOCAL_INV:
2776 return IB_WC_LOCAL_INV;
2782 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2784 int wq_is_full, err_wr, pbl_is_full;
2785 struct qedr_dev *dev = qp->dev;
2787 /* prevent SQ overflow and/or processing of a bad WR */
2788 err_wr = wr->num_sge > qp->sq.max_sges;
2789 wq_is_full = qedr_wq_is_full(&qp->sq);
2790 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2791 QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2792 if (wq_is_full || err_wr || pbl_is_full) {
2793 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2795 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2797 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2800 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2802 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2804 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2808 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2810 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2812 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2819 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2820 struct ib_send_wr **bad_wr)
2822 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2823 struct qedr_qp *qp = get_qedr_qp(ibqp);
2824 struct rdma_sq_atomic_wqe_1st *awqe1;
2825 struct rdma_sq_atomic_wqe_2nd *awqe2;
2826 struct rdma_sq_atomic_wqe_3rd *awqe3;
2827 struct rdma_sq_send_wqe_2st *swqe2;
2828 struct rdma_sq_local_inv_wqe *iwqe;
2829 struct rdma_sq_rdma_wqe_2nd *rwqe2;
2830 struct rdma_sq_send_wqe_1st *swqe;
2831 struct rdma_sq_rdma_wqe_1st *rwqe;
2832 struct rdma_sq_fmr_wqe_1st *fwqe1;
2833 struct rdma_sq_common_wqe *wqe;
2838 if (!qedr_can_post_send(qp, wr)) {
2843 wqe = qed_chain_produce(&qp->sq.pbl);
2844 qp->wqe_wr_id[qp->sq.prod].signaled =
2845 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2848 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2849 !!(wr->send_flags & IB_SEND_SOLICITED));
2850 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2851 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2852 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2853 !!(wr->send_flags & IB_SEND_FENCE));
2854 wqe->prev_wqe_size = qp->prev_wqe_size;
2856 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2858 switch (wr->opcode) {
2859 case IB_WR_SEND_WITH_IMM:
2860 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2865 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
2866 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2868 swqe2 = qed_chain_produce(&qp->sq.pbl);
2870 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
2871 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2873 swqe->length = cpu_to_le32(length);
2874 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2875 qp->prev_wqe_size = swqe->wqe_size;
2876 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2879 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
2880 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2883 swqe2 = qed_chain_produce(&qp->sq.pbl);
2884 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2886 swqe->length = cpu_to_le32(length);
2887 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2888 qp->prev_wqe_size = swqe->wqe_size;
2889 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2891 case IB_WR_SEND_WITH_INV:
2892 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
2893 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
2894 swqe2 = qed_chain_produce(&qp->sq.pbl);
2896 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
2897 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
2899 swqe->length = cpu_to_le32(length);
2900 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
2901 qp->prev_wqe_size = swqe->wqe_size;
2902 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
2905 case IB_WR_RDMA_WRITE_WITH_IMM:
2906 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2911 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
2912 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2915 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
2916 rwqe2 = qed_chain_produce(&qp->sq.pbl);
2917 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2919 rwqe->length = cpu_to_le32(length);
2920 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2921 qp->prev_wqe_size = rwqe->wqe_size;
2922 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2924 case IB_WR_RDMA_WRITE:
2925 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
2926 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2929 rwqe2 = qed_chain_produce(&qp->sq.pbl);
2930 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2932 rwqe->length = cpu_to_le32(length);
2933 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2934 qp->prev_wqe_size = rwqe->wqe_size;
2935 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2937 case IB_WR_RDMA_READ_WITH_INV:
2939 "RDMA READ WITH INVALIDATE not supported\n");
2944 case IB_WR_RDMA_READ:
2945 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
2946 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
2949 rwqe2 = qed_chain_produce(&qp->sq.pbl);
2950 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
2952 rwqe->length = cpu_to_le32(length);
2953 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
2954 qp->prev_wqe_size = rwqe->wqe_size;
2955 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
2958 case IB_WR_ATOMIC_CMP_AND_SWP:
2959 case IB_WR_ATOMIC_FETCH_AND_ADD:
2960 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
2961 awqe1->wqe_size = 4;
2963 awqe2 = qed_chain_produce(&qp->sq.pbl);
2964 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
2965 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
2967 awqe3 = qed_chain_produce(&qp->sq.pbl);
2969 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2970 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
2971 DMA_REGPAIR_LE(awqe3->swap_data,
2972 atomic_wr(wr)->compare_add);
2974 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
2975 DMA_REGPAIR_LE(awqe3->swap_data,
2976 atomic_wr(wr)->swap);
2977 DMA_REGPAIR_LE(awqe3->cmp_data,
2978 atomic_wr(wr)->compare_add);
2981 qedr_prepare_sq_sges(qp, NULL, wr);
2983 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
2984 qp->prev_wqe_size = awqe1->wqe_size;
2987 case IB_WR_LOCAL_INV:
2988 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
2991 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
2992 iwqe->inv_l_key = wr->ex.invalidate_rkey;
2993 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
2994 qp->prev_wqe_size = iwqe->wqe_size;
2997 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
2998 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
2999 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3000 fwqe1->wqe_size = 2;
3002 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3004 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3009 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3010 qp->prev_wqe_size = fwqe1->wqe_size;
3013 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3022 /* Restore prod to its position before
3023 * this WR was processed
3025 value = le16_to_cpu(qp->sq.db_data.data.value);
3026 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3028 /* Restore prev_wqe_size */
3029 qp->prev_wqe_size = wqe->prev_wqe_size;
3031 DP_ERR(dev, "POST SEND FAILED\n");
3037 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3038 struct ib_send_wr **bad_wr)
3040 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3041 struct qedr_qp *qp = get_qedr_qp(ibqp);
3042 unsigned long flags;
3047 if (qp->qp_type == IB_QPT_GSI)
3048 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3050 spin_lock_irqsave(&qp->q_lock, flags);
3052 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3053 (qp->state != QED_ROCE_QP_STATE_ERR) &&
3054 (qp->state != QED_ROCE_QP_STATE_SQD)) {
3055 spin_unlock_irqrestore(&qp->q_lock, flags);
3057 DP_DEBUG(dev, QEDR_MSG_CQ,
3058 "QP in wrong state! QP icid=0x%x state %d\n",
3059 qp->icid, qp->state);
3064 rc = __qedr_post_send(ibqp, wr, bad_wr);
3068 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3070 qedr_inc_sw_prod(&qp->sq);
3072 qp->sq.db_data.data.value++;
3078 * If there was a failure in the first WR then it will be triggered in
3079 * vane. However this is not harmful (as long as the producer value is
3080 * unchanged). For performance reasons we avoid checking for this
3081 * redundant doorbell.
3084 writel(qp->sq.db_data.raw, qp->sq.db);
3086 /* Make sure write sticks */
3089 spin_unlock_irqrestore(&qp->q_lock, flags);
3094 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3095 struct ib_recv_wr **bad_wr)
3097 struct qedr_qp *qp = get_qedr_qp(ibqp);
3098 struct qedr_dev *dev = qp->dev;
3099 unsigned long flags;
3102 if (qp->qp_type == IB_QPT_GSI)
3103 return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3105 spin_lock_irqsave(&qp->q_lock, flags);
3107 if (qp->state == QED_ROCE_QP_STATE_RESET) {
3108 spin_unlock_irqrestore(&qp->q_lock, flags);
3116 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3117 QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3118 wr->num_sge > qp->rq.max_sges) {
3119 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n",
3120 qed_chain_get_elem_left_u32(&qp->rq.pbl),
3121 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3127 for (i = 0; i < wr->num_sge; i++) {
3129 struct rdma_rq_sge *rqe =
3130 qed_chain_produce(&qp->rq.pbl);
3132 /* First one must include the number
3133 * of SGE in the list
3136 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3139 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3140 wr->sg_list[i].lkey);
3142 RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3143 wr->sg_list[i].length, flags);
3146 /* Special case of no sges. FW requires between 1-4 sges...
3147 * in this case we need to post 1 sge with length zero. this is
3148 * because rdma write with immediate consumes an RQ.
3152 struct rdma_rq_sge *rqe =
3153 qed_chain_produce(&qp->rq.pbl);
3155 /* First one must include the number
3156 * of SGE in the list
3158 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3159 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3161 RQ_SGE_SET(rqe, 0, 0, flags);
3165 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3166 qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3168 qedr_inc_sw_prod(&qp->rq);
3170 /* Flush all the writes before signalling doorbell */
3173 qp->rq.db_data.data.value++;
3175 writel(qp->rq.db_data.raw, qp->rq.db);
3177 /* Make sure write sticks */
3183 spin_unlock_irqrestore(&qp->q_lock, flags);
3188 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3190 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3192 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3196 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3198 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3201 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3202 resp_cqe->qp_handle.lo,
3207 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3209 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3211 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3214 /* Return latest CQE (needs processing) */
3215 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3217 return cq->latest_cqe;
3220 /* In fmr we need to increase the number of fmr completed counter for the fmr
3221 * algorithm determining whether we can free a pbl or not.
3222 * we need to perform this whether the work request was signaled or not. for
3223 * this purpose we call this function from the condition that checks if a wr
3224 * should be skipped, to make sure we don't miss it ( possibly this fmr
3225 * operation was not signalted)
3227 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3229 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3230 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3233 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3234 struct qedr_cq *cq, int num_entries,
3235 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3240 while (num_entries && qp->sq.wqe_cons != hw_cons) {
3241 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3242 qedr_chk_if_fmr(qp);
3248 wc->status = status;
3251 wc->src_qp = qp->id;
3254 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3255 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3257 switch (wc->opcode) {
3258 case IB_WC_RDMA_WRITE:
3259 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3261 case IB_WC_COMP_SWAP:
3262 case IB_WC_FETCH_ADD:
3266 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3268 case IB_WC_RDMA_READ:
3270 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3280 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3281 qed_chain_consume(&qp->sq.pbl);
3282 qedr_inc_sw_cons(&qp->sq);
3288 static int qedr_poll_cq_req(struct qedr_dev *dev,
3289 struct qedr_qp *qp, struct qedr_cq *cq,
3290 int num_entries, struct ib_wc *wc,
3291 struct rdma_cqe_requester *req)
3295 switch (req->status) {
3296 case RDMA_CQE_REQ_STS_OK:
3297 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3300 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3301 if (qp->state != QED_ROCE_QP_STATE_ERR)
3303 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3304 cq->icid, qp->icid);
3305 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3306 IB_WC_WR_FLUSH_ERR, 1);
3309 /* process all WQE before the cosumer */
3310 qp->state = QED_ROCE_QP_STATE_ERR;
3311 cnt = process_req(dev, qp, cq, num_entries, wc,
3312 req->sq_cons - 1, IB_WC_SUCCESS, 0);
3314 /* if we have extra WC fill it with actual error info */
3315 if (cnt < num_entries) {
3316 enum ib_wc_status wc_status;
3318 switch (req->status) {
3319 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3321 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3322 cq->icid, qp->icid);
3323 wc_status = IB_WC_BAD_RESP_ERR;
3325 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3327 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3328 cq->icid, qp->icid);
3329 wc_status = IB_WC_LOC_LEN_ERR;
3331 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3333 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3334 cq->icid, qp->icid);
3335 wc_status = IB_WC_LOC_QP_OP_ERR;
3337 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3339 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3340 cq->icid, qp->icid);
3341 wc_status = IB_WC_LOC_PROT_ERR;
3343 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3345 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3346 cq->icid, qp->icid);
3347 wc_status = IB_WC_MW_BIND_ERR;
3349 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3351 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3352 cq->icid, qp->icid);
3353 wc_status = IB_WC_REM_INV_REQ_ERR;
3355 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3357 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3358 cq->icid, qp->icid);
3359 wc_status = IB_WC_REM_ACCESS_ERR;
3361 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3363 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3364 cq->icid, qp->icid);
3365 wc_status = IB_WC_REM_OP_ERR;
3367 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3369 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3370 cq->icid, qp->icid);
3371 wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3373 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3375 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3376 cq->icid, qp->icid);
3377 wc_status = IB_WC_RETRY_EXC_ERR;
3381 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3382 cq->icid, qp->icid);
3383 wc_status = IB_WC_GENERAL_ERR;
3385 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3393 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3396 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3397 return IB_WC_LOC_ACCESS_ERR;
3398 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3399 return IB_WC_LOC_LEN_ERR;
3400 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3401 return IB_WC_LOC_QP_OP_ERR;
3402 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3403 return IB_WC_LOC_PROT_ERR;
3404 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3405 return IB_WC_MW_BIND_ERR;
3406 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3407 return IB_WC_REM_INV_RD_REQ_ERR;
3408 case RDMA_CQE_RESP_STS_OK:
3409 return IB_WC_SUCCESS;
3411 return IB_WC_GENERAL_ERR;
3415 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3418 wc->status = IB_WC_SUCCESS;
3419 wc->byte_len = le32_to_cpu(resp->length);
3421 if (resp->flags & QEDR_RESP_IMM) {
3422 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3423 wc->wc_flags |= IB_WC_WITH_IMM;
3425 if (resp->flags & QEDR_RESP_RDMA)
3426 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3428 if (resp->flags & QEDR_RESP_INV)
3431 } else if (resp->flags & QEDR_RESP_INV) {
3432 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3433 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3435 if (resp->flags & QEDR_RESP_RDMA)
3438 } else if (resp->flags & QEDR_RESP_RDMA) {
3445 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3446 struct qedr_cq *cq, struct ib_wc *wc,
3447 struct rdma_cqe_responder *resp, u64 wr_id)
3449 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3450 wc->opcode = IB_WC_RECV;
3453 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3454 if (qedr_set_ok_cqe_resp_wc(resp, wc))
3456 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3457 cq, cq->icid, resp->flags);
3460 wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3461 if (wc->status == IB_WC_GENERAL_ERR)
3463 "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3464 cq, cq->icid, resp->status);
3467 /* Fill the rest of the WC */
3469 wc->src_qp = qp->id;
3474 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3475 struct qedr_cq *cq, struct ib_wc *wc,
3476 struct rdma_cqe_responder *resp)
3478 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3480 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
3482 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3483 qed_chain_consume(&qp->rq.pbl);
3484 qedr_inc_sw_cons(&qp->rq);
3489 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3490 int num_entries, struct ib_wc *wc, u16 hw_cons)
3494 while (num_entries && qp->rq.wqe_cons != hw_cons) {
3496 wc->status = IB_WC_WR_FLUSH_ERR;
3499 wc->src_qp = qp->id;
3501 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3506 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3507 qed_chain_consume(&qp->rq.pbl);
3508 qedr_inc_sw_cons(&qp->rq);
3514 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3515 struct rdma_cqe_responder *resp, int *update)
3517 if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3523 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3524 struct qedr_cq *cq, int num_entries,
3525 struct ib_wc *wc, struct rdma_cqe_responder *resp,
3530 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3531 cnt = process_resp_flush(qp, cq, num_entries, wc,
3533 try_consume_resp_cqe(cq, qp, resp, update);
3535 cnt = process_resp_one(dev, qp, cq, wc, resp);
3543 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3544 struct rdma_cqe_requester *req, int *update)
3546 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3552 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3554 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3555 struct qedr_cq *cq = get_qedr_cq(ibcq);
3556 union rdma_cqe *cqe;
3557 u32 old_cons, new_cons;
3558 unsigned long flags;
3562 if (cq->destroyed) {
3564 "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3569 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3570 return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3572 spin_lock_irqsave(&cq->cq_lock, flags);
3573 cqe = cq->latest_cqe;
3574 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3575 while (num_entries && is_valid_cqe(cq, cqe)) {
3579 /* prevent speculative reads of any field of CQE */
3582 qp = cqe_get_qp(cqe);
3584 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3590 switch (cqe_get_type(cqe)) {
3591 case RDMA_CQE_TYPE_REQUESTER:
3592 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3594 try_consume_req_cqe(cq, qp, &cqe->req, &update);
3596 case RDMA_CQE_TYPE_RESPONDER_RQ:
3597 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3598 &cqe->resp, &update);
3600 case RDMA_CQE_TYPE_INVALID:
3602 DP_ERR(dev, "Error: invalid CQE type = %d\n",
3611 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3613 cq->cq_cons += new_cons - old_cons;
3616 /* doorbell notifies abount latest VALID entry,
3617 * but chain already point to the next INVALID one
3619 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3621 spin_unlock_irqrestore(&cq->cq_lock, flags);
3625 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3627 const struct ib_wc *in_wc,
3628 const struct ib_grh *in_grh,
3629 const struct ib_mad_hdr *mad_hdr,
3630 size_t in_mad_size, struct ib_mad_hdr *out_mad,
3631 size_t *out_mad_size, u16 *out_mad_pkey_index)
3633 struct qedr_dev *dev = get_qedr_dev(ibdev);
3635 DP_DEBUG(dev, QEDR_MSG_GSI,
3636 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3637 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3638 mad_hdr->class_specific, mad_hdr->class_version,
3639 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3640 return IB_MAD_RESULT_SUCCESS;
3643 int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
3644 struct ib_port_immutable *immutable)
3646 struct ib_port_attr attr;
3649 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
3650 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
3652 err = ib_query_port(ibdev, port_num, &attr);
3656 immutable->pkey_tbl_len = attr.pkey_tbl_len;
3657 immutable->gid_tbl_len = attr.gid_tbl_len;
3658 immutable->max_mad_size = IB_MGMT_MAD_SIZE;