drivers/infiniband/sw/rdmavt/cq.c

   1 /*
   2  * Copyright(c) 2016 - 2018 Intel Corporation.
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of version 2 of the GNU General Public License as
  11  * published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * General Public License for more details.
  17  *
  18  * BSD LICENSE
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  *
  24  *  - Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  *  - Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in
  28  *    the documentation and/or other materials provided with the
  29  *    distribution.
  30  *  - Neither the name of Intel Corporation nor the names of its
  31  *    contributors may be used to endorse or promote products derived
  32  *    from this software without specific prior written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45  *
  46  */
  47
  48 #include <linux/slab.h>
  49 #include <linux/vmalloc.h>
  50 #include "cq.h"
  51 #include "vt.h"
  52 #include "trace.h"
  53
  54 static struct workqueue_struct *comp_vector_wq;
  55
  56 /**
  57  * rvt_cq_enter - add a new entry to the completion queue
  58  * @cq: completion queue
  59  * @entry: work completion entry to add
  60  * @solicited: true if @entry is solicited
  61  *
  62  * This may be called with qp->s_lock held.
  63  */
  64 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
  65 {
  66         struct rvt_cq_wc *wc;
  67         unsigned long flags;
  68         u32 head;
  69         u32 next;
  70
  71         spin_lock_irqsave(&cq->lock, flags);
  72
  73         /*
  74          * Note that the head pointer might be writable by user processes.
  75          * Take care to verify it is a sane value.
  76          */
  77         wc = cq->queue;
  78         head = wc->head;
  79         if (head >= (unsigned)cq->ibcq.cqe) {
  80                 head = cq->ibcq.cqe;
  81                 next = 0;
  82         } else {
  83                 next = head + 1;
  84         }
  85
  86         if (unlikely(next == wc->tail)) {
  87                 spin_unlock_irqrestore(&cq->lock, flags);
  88                 if (cq->ibcq.event_handler) {
  89                         struct ib_event ev;
  90
  91                         ev.device = cq->ibcq.device;
  92                         ev.element.cq = &cq->ibcq;
  93                         ev.event = IB_EVENT_CQ_ERR;
  94                         cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
  95                 }
  96                 return;
  97         }
  98         trace_rvt_cq_enter(cq, entry, head);
  99         if (cq->ip) {
 100                 wc->uqueue[head].wr_id = entry->wr_id;
 101                 wc->uqueue[head].status = entry->status;
 102                 wc->uqueue[head].opcode = entry->opcode;
 103                 wc->uqueue[head].vendor_err = entry->vendor_err;
 104                 wc->uqueue[head].byte_len = entry->byte_len;
 105                 wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
 106                 wc->uqueue[head].qp_num = entry->qp->qp_num;
 107                 wc->uqueue[head].src_qp = entry->src_qp;
 108                 wc->uqueue[head].wc_flags = entry->wc_flags;
 109                 wc->uqueue[head].pkey_index = entry->pkey_index;
 110                 wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
 111                 wc->uqueue[head].sl = entry->sl;
 112                 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
 113                 wc->uqueue[head].port_num = entry->port_num;
 114                 /* Make sure entry is written before the head index. */
 115                 smp_wmb();
 116         } else {
 117                 wc->kqueue[head] = *entry;
 118         }
 119         wc->head = next;
 120
 121         if (cq->notify == IB_CQ_NEXT_COMP ||
 122             (cq->notify == IB_CQ_SOLICITED &&
 123              (solicited || entry->status != IB_WC_SUCCESS))) {
 124                 /*
 125                  * This will cause send_complete() to be called in
 126                  * another thread.
 127                  */
 128                 cq->notify = RVT_CQ_NONE;
 129                 cq->triggered++;
 130                 queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
 131                               &cq->comptask);
 132         }
 133
 134         spin_unlock_irqrestore(&cq->lock, flags);
 135 }
 136 EXPORT_SYMBOL(rvt_cq_enter);
 137
 138 static void send_complete(struct work_struct *work)
 139 {
 140         struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 141
 142         /*
 143          * The completion handler will most likely rearm the notification
 144          * and poll for all pending entries.  If a new completion entry
 145          * is added while we are in this routine, queue_work()
 146          * won't call us again until we return so we check triggered to
 147          * see if we need to call the handler again.
 148          */
 149         for (;;) {
 150                 u8 triggered = cq->triggered;
 151
 152                 /*
 153                  * IPoIB connected mode assumes the callback is from a
 154                  * soft IRQ. We simulate this by blocking "bottom halves".
 155                  * See the implementation for ipoib_cm_handle_tx_wc(),
 156                  * netif_tx_lock_bh() and netif_tx_lock().
 157                  */
 158                 local_bh_disable();
 159                 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 160                 local_bh_enable();
 161
 162                 if (cq->triggered == triggered)
 163                         return;
 164         }
 165 }
 166
 167 /**
 168  * rvt_create_cq - create a completion queue
 169  * @ibdev: the device this completion queue is attached to
 170  * @attr: creation attributes
 171  * @context: unused by the QLogic_IB driver
 172  * @udata: user data for libibverbs.so
 173  *
 174  * Called by ib_create_cq() in the generic verbs code.
 175  *
 176  * Return: pointer to the completion queue or negative errno values
 177  * for failure.
 178  */
 179 struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
 180                             const struct ib_cq_init_attr *attr,
 181                             struct ib_ucontext *context,
 182                             struct ib_udata *udata)
 183 {
 184         struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
 185         struct rvt_cq *cq;
 186         struct rvt_cq_wc *wc;
 187         struct ib_cq *ret;
 188         u32 sz;
 189         unsigned int entries = attr->cqe;
 190         int comp_vector = attr->comp_vector;
 191
 192         if (attr->flags)
 193                 return ERR_PTR(-EINVAL);
 194
 195         if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 196                 return ERR_PTR(-EINVAL);
 197
 198         if (comp_vector < 0)
 199                 comp_vector = 0;
 200
 201         comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
 202
 203         /* Allocate the completion queue structure. */
 204         cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
 205         if (!cq)
 206                 return ERR_PTR(-ENOMEM);
 207
 208         /*
 209          * Allocate the completion queue entries and head/tail pointers.
 210          * This is allocated separately so that it can be resized and
 211          * also mapped into user space.
 212          * We need to use vmalloc() in order to support mmap and large
 213          * numbers of entries.
 214          */
 215         sz = sizeof(*wc);
 216         if (udata && udata->outlen >= sizeof(__u64))
 217                 sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
 218         else
 219                 sz += sizeof(struct ib_wc) * (entries + 1);
 220         wc = udata ?
 221                 vmalloc_user(sz) :
 222                 vzalloc_node(sz, rdi->dparms.node);
 223         if (!wc) {
 224                 ret = ERR_PTR(-ENOMEM);
 225                 goto bail_cq;
 226         }
 227
 228         /*
 229          * Return the address of the WC as the offset to mmap.
 230          * See rvt_mmap() for details.
 231          */
 232         if (udata && udata->outlen >= sizeof(__u64)) {
 233                 int err;
 234
 235                 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 236                 if (!cq->ip) {
 237                         ret = ERR_PTR(-ENOMEM);
 238                         goto bail_wc;
 239                 }
 240
 241                 err = ib_copy_to_udata(udata, &cq->ip->offset,
 242                                        sizeof(cq->ip->offset));
 243                 if (err) {
 244                         ret = ERR_PTR(err);
 245                         goto bail_ip;
 246                 }
 247         }
 248
 249         spin_lock_irq(&rdi->n_cqs_lock);
 250         if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
 251                 spin_unlock_irq(&rdi->n_cqs_lock);
 252                 ret = ERR_PTR(-ENOMEM);
 253                 goto bail_ip;
 254         }
 255
 256         rdi->n_cqs_allocated++;
 257         spin_unlock_irq(&rdi->n_cqs_lock);
 258
 259         if (cq->ip) {
 260                 spin_lock_irq(&rdi->pending_lock);
 261                 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
 262                 spin_unlock_irq(&rdi->pending_lock);
 263         }
 264
 265         /*
 266          * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 267          * The number of entries should be >= the number requested or return
 268          * an error.
 269          */
 270         cq->rdi = rdi;
 271         if (rdi->driver_f.comp_vect_cpu_lookup)
 272                 cq->comp_vector_cpu =
 273                         rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
 274         else
 275                 cq->comp_vector_cpu =
 276                         cpumask_first(cpumask_of_node(rdi->dparms.node));
 277
 278         cq->ibcq.cqe = entries;
 279         cq->notify = RVT_CQ_NONE;
 280         spin_lock_init(&cq->lock);
 281         INIT_WORK(&cq->comptask, send_complete);
 282         cq->queue = wc;
 283
 284         ret = &cq->ibcq;
 285
 286         trace_rvt_create_cq(cq, attr);
 287         goto done;
 288
 289 bail_ip:
 290         kfree(cq->ip);
 291 bail_wc:
 292         vfree(wc);
 293 bail_cq:
 294         kfree(cq);
 295 done:
 296         return ret;
 297 }
 298
 299 /**
 300  * rvt_destroy_cq - destroy a completion queue
 301  * @ibcq: the completion queue to destroy.
 302  *
 303  * Called by ib_destroy_cq() in the generic verbs code.
 304  *
 305  * Return: always 0
 306  */
 307 int rvt_destroy_cq(struct ib_cq *ibcq)
 308 {
 309         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 310         struct rvt_dev_info *rdi = cq->rdi;
 311
 312         flush_work(&cq->comptask);
 313         spin_lock_irq(&rdi->n_cqs_lock);
 314         rdi->n_cqs_allocated--;
 315         spin_unlock_irq(&rdi->n_cqs_lock);
 316         if (cq->ip)
 317                 kref_put(&cq->ip->ref, rvt_release_mmap_info);
 318         else
 319                 vfree(cq->queue);
 320         kfree(cq);
 321
 322         return 0;
 323 }
 324
 325 /**
 326  * rvt_req_notify_cq - change the notification type for a completion queue
 327  * @ibcq: the completion queue
 328  * @notify_flags: the type of notification to request
 329  *
 330  * This may be called from interrupt context.  Also called by
 331  * ib_req_notify_cq() in the generic verbs code.
 332  *
 333  * Return: 0 for success.
 334  */
 335 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 336 {
 337         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 338         unsigned long flags;
 339         int ret = 0;
 340
 341         spin_lock_irqsave(&cq->lock, flags);
 342         /*
 343          * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
 344          * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 345          */
 346         if (cq->notify != IB_CQ_NEXT_COMP)
 347                 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
 348
 349         if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
 350             cq->queue->head != cq->queue->tail)
 351                 ret = 1;
 352
 353         spin_unlock_irqrestore(&cq->lock, flags);
 354
 355         return ret;
 356 }
 357
 358 /**
 359  * rvt_resize_cq - change the size of the CQ
 360  * @ibcq: the completion queue
 361  *
 362  * Return: 0 for success.
 363  */
 364 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 365 {
 366         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 367         struct rvt_cq_wc *old_wc;
 368         struct rvt_cq_wc *wc;
 369         u32 head, tail, n;
 370         int ret;
 371         u32 sz;
 372         struct rvt_dev_info *rdi = cq->rdi;
 373
 374         if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
 375                 return -EINVAL;
 376
 377         /*
 378          * Need to use vmalloc() if we want to support large #s of entries.
 379          */
 380         sz = sizeof(*wc);
 381         if (udata && udata->outlen >= sizeof(__u64))
 382                 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
 383         else
 384                 sz += sizeof(struct ib_wc) * (cqe + 1);
 385         wc = udata ?
 386                 vmalloc_user(sz) :
 387                 vzalloc_node(sz, rdi->dparms.node);
 388         if (!wc)
 389                 return -ENOMEM;
 390
 391         /* Check that we can write the offset to mmap. */
 392         if (udata && udata->outlen >= sizeof(__u64)) {
 393                 __u64 offset = 0;
 394
 395                 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
 396                 if (ret)
 397                         goto bail_free;
 398         }
 399
 400         spin_lock_irq(&cq->lock);
 401         /*
 402          * Make sure head and tail are sane since they
 403          * might be user writable.
 404          */
 405         old_wc = cq->queue;
 406         head = old_wc->head;
 407         if (head > (u32)cq->ibcq.cqe)
 408                 head = (u32)cq->ibcq.cqe;
 409         tail = old_wc->tail;
 410         if (tail > (u32)cq->ibcq.cqe)
 411                 tail = (u32)cq->ibcq.cqe;
 412         if (head < tail)
 413                 n = cq->ibcq.cqe + 1 + head - tail;
 414         else
 415                 n = head - tail;
 416         if (unlikely((u32)cqe < n)) {
 417                 ret = -EINVAL;
 418                 goto bail_unlock;
 419         }
 420         for (n = 0; tail != head; n++) {
 421                 if (cq->ip)
 422                         wc->uqueue[n] = old_wc->uqueue[tail];
 423                 else
 424                         wc->kqueue[n] = old_wc->kqueue[tail];
 425                 if (tail == (u32)cq->ibcq.cqe)
 426                         tail = 0;
 427                 else
 428                         tail++;
 429         }
 430         cq->ibcq.cqe = cqe;
 431         wc->head = n;
 432         wc->tail = 0;
 433         cq->queue = wc;
 434         spin_unlock_irq(&cq->lock);
 435
 436         vfree(old_wc);
 437
 438         if (cq->ip) {
 439                 struct rvt_mmap_info *ip = cq->ip;
 440
 441                 rvt_update_mmap_info(rdi, ip, sz, wc);
 442
 443                 /*
 444                  * Return the offset to mmap.
 445                  * See rvt_mmap() for details.
 446                  */
 447                 if (udata && udata->outlen >= sizeof(__u64)) {
 448                         ret = ib_copy_to_udata(udata, &ip->offset,
 449                                                sizeof(ip->offset));
 450                         if (ret)
 451                                 return ret;
 452                 }
 453
 454                 spin_lock_irq(&rdi->pending_lock);
 455                 if (list_empty(&ip->pending_mmaps))
 456                         list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
 457                 spin_unlock_irq(&rdi->pending_lock);
 458         }
 459
 460         return 0;
 461
 462 bail_unlock:
 463         spin_unlock_irq(&cq->lock);
 464 bail_free:
 465         vfree(wc);
 466         return ret;
 467 }
 468
 469 /**
 470  * rvt_poll_cq - poll for work completion entries
 471  * @ibcq: the completion queue to poll
 472  * @num_entries: the maximum number of entries to return
 473  * @entry: pointer to array where work completions are placed
 474  *
 475  * This may be called from interrupt context.  Also called by ib_poll_cq()
 476  * in the generic verbs code.
 477  *
 478  * Return: the number of completion entries polled.
 479  */
 480 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 481 {
 482         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 483         struct rvt_cq_wc *wc;
 484         unsigned long flags;
 485         int npolled;
 486         u32 tail;
 487
 488         /* The kernel can only poll a kernel completion queue */
 489         if (cq->ip)
 490                 return -EINVAL;
 491
 492         spin_lock_irqsave(&cq->lock, flags);
 493
 494         wc = cq->queue;
 495         tail = wc->tail;
 496         if (tail > (u32)cq->ibcq.cqe)
 497                 tail = (u32)cq->ibcq.cqe;
 498         for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
 499                 if (tail == wc->head)
 500                         break;
 501                 /* The kernel doesn't need a RMB since it has the lock. */
 502                 trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
 503                 *entry = wc->kqueue[tail];
 504                 if (tail >= cq->ibcq.cqe)
 505                         tail = 0;
 506                 else
 507                         tail++;
 508         }
 509         wc->tail = tail;
 510
 511         spin_unlock_irqrestore(&cq->lock, flags);
 512
 513         return npolled;
 514 }
 515
 516 /**
 517  * rvt_driver_cq_init - Init cq resources on behalf of driver
 518  * @rdi: rvt dev structure
 519  *
 520  * Return: 0 on success
 521  */
 522 int rvt_driver_cq_init(void)
 523 {
 524         comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
 525                                          0, "rdmavt_cq");
 526         if (!comp_vector_wq)
 527                 return -ENOMEM;
 528
 529         return 0;
 530 }
 531
 532 /**
 533  * rvt_cq_exit - tear down cq reources
 534  * @rdi: rvt dev structure
 535  */
 536 void rvt_cq_exit(void)
 537 {
 538         destroy_workqueue(comp_vector_wq);
 539         comp_vector_wq = NULL;
 540 }