drivers/infiniband/sw/rdmavt/cq.c

   1 /*
   2  * Copyright(c) 2016 Intel Corporation.
   3  *
   4  * This file is provided under a dual BSD/GPLv2 license.  When using or
   5  * redistributing this file, you may do so under either license.
   6  *
   7  * GPL LICENSE SUMMARY
   8  *
   9  * This program is free software; you can redistribute it and/or modify
  10  * it under the terms of version 2 of the GNU General Public License as
  11  * published by the Free Software Foundation.
  12  *
  13  * This program is distributed in the hope that it will be useful, but
  14  * WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * General Public License for more details.
  17  *
  18  * BSD LICENSE
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  *
  24  *  - Redistributions of source code must retain the above copyright
  25  *    notice, this list of conditions and the following disclaimer.
  26  *  - Redistributions in binary form must reproduce the above copyright
  27  *    notice, this list of conditions and the following disclaimer in
  28  *    the documentation and/or other materials provided with the
  29  *    distribution.
  30  *  - Neither the name of Intel Corporation nor the names of its
  31  *    contributors may be used to endorse or promote products derived
  32  *    from this software without specific prior written permission.
  33  *
  34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45  *
  46  */
  47
  48 #include <linux/slab.h>
  49 #include <linux/vmalloc.h>
  50 #include <linux/kthread.h>
  51 #include "cq.h"
  52 #include "vt.h"
  53
  54 /**
  55  * rvt_cq_enter - add a new entry to the completion queue
  56  * @cq: completion queue
  57  * @entry: work completion entry to add
  58  * @sig: true if @entry is solicited
  59  *
  60  * This may be called with qp->s_lock held.
  61  */
  62 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
  63 {
  64         struct rvt_cq_wc *wc;
  65         unsigned long flags;
  66         u32 head;
  67         u32 next;
  68
  69         spin_lock_irqsave(&cq->lock, flags);
  70
  71         /*
  72          * Note that the head pointer might be writable by user processes.
  73          * Take care to verify it is a sane value.
  74          */
  75         wc = cq->queue;
  76         head = wc->head;
  77         if (head >= (unsigned)cq->ibcq.cqe) {
  78                 head = cq->ibcq.cqe;
  79                 next = 0;
  80         } else {
  81                 next = head + 1;
  82         }
  83
  84         if (unlikely(next == wc->tail)) {
  85                 spin_unlock_irqrestore(&cq->lock, flags);
  86                 if (cq->ibcq.event_handler) {
  87                         struct ib_event ev;
  88
  89                         ev.device = cq->ibcq.device;
  90                         ev.element.cq = &cq->ibcq;
  91                         ev.event = IB_EVENT_CQ_ERR;
  92                         cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
  93                 }
  94                 return;
  95         }
  96         if (cq->ip) {
  97                 wc->uqueue[head].wr_id = entry->wr_id;
  98                 wc->uqueue[head].status = entry->status;
  99                 wc->uqueue[head].opcode = entry->opcode;
 100                 wc->uqueue[head].vendor_err = entry->vendor_err;
 101                 wc->uqueue[head].byte_len = entry->byte_len;
 102                 wc->uqueue[head].ex.imm_data =
 103                         (__u32 __force)entry->ex.imm_data;
 104                 wc->uqueue[head].qp_num = entry->qp->qp_num;
 105                 wc->uqueue[head].src_qp = entry->src_qp;
 106                 wc->uqueue[head].wc_flags = entry->wc_flags;
 107                 wc->uqueue[head].pkey_index = entry->pkey_index;
 108                 wc->uqueue[head].slid = entry->slid;
 109                 wc->uqueue[head].sl = entry->sl;
 110                 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
 111                 wc->uqueue[head].port_num = entry->port_num;
 112                 /* Make sure entry is written before the head index. */
 113                 smp_wmb();
 114         } else {
 115                 wc->kqueue[head] = *entry;
 116         }
 117         wc->head = next;
 118
 119         if (cq->notify == IB_CQ_NEXT_COMP ||
 120             (cq->notify == IB_CQ_SOLICITED &&
 121              (solicited || entry->status != IB_WC_SUCCESS))) {
 122                 struct kthread_worker *worker;
 123                 /*
 124                  * This will cause send_complete() to be called in
 125                  * another thread.
 126                  */
 127                 smp_read_barrier_depends(); /* see rvt_cq_exit */
 128                 worker = cq->rdi->worker;
 129                 if (likely(worker)) {
 130                         cq->notify = RVT_CQ_NONE;
 131                         cq->triggered++;
 132                         kthread_queue_work(worker, &cq->comptask);
 133                 }
 134         }
 135
 136         spin_unlock_irqrestore(&cq->lock, flags);
 137 }
 138 EXPORT_SYMBOL(rvt_cq_enter);
 139
 140 static void send_complete(struct kthread_work *work)
 141 {
 142         struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 143
 144         /*
 145          * The completion handler will most likely rearm the notification
 146          * and poll for all pending entries.  If a new completion entry
 147          * is added while we are in this routine, queue_work()
 148          * won't call us again until we return so we check triggered to
 149          * see if we need to call the handler again.
 150          */
 151         for (;;) {
 152                 u8 triggered = cq->triggered;
 153
 154                 /*
 155                  * IPoIB connected mode assumes the callback is from a
 156                  * soft IRQ. We simulate this by blocking "bottom halves".
 157                  * See the implementation for ipoib_cm_handle_tx_wc(),
 158                  * netif_tx_lock_bh() and netif_tx_lock().
 159                  */
 160                 local_bh_disable();
 161                 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
 162                 local_bh_enable();
 163
 164                 if (cq->triggered == triggered)
 165                         return;
 166         }
 167 }
 168
 169 /**
 170  * rvt_create_cq - create a completion queue
 171  * @ibdev: the device this completion queue is attached to
 172  * @attr: creation attributes
 173  * @context: unused by the QLogic_IB driver
 174  * @udata: user data for libibverbs.so
 175  *
 176  * Called by ib_create_cq() in the generic verbs code.
 177  *
 178  * Return: pointer to the completion queue or negative errno values
 179  * for failure.
 180  */
 181 struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
 182                             const struct ib_cq_init_attr *attr,
 183                             struct ib_ucontext *context,
 184                             struct ib_udata *udata)
 185 {
 186         struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
 187         struct rvt_cq *cq;
 188         struct rvt_cq_wc *wc;
 189         struct ib_cq *ret;
 190         u32 sz;
 191         unsigned int entries = attr->cqe;
 192
 193         if (attr->flags)
 194                 return ERR_PTR(-EINVAL);
 195
 196         if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 197                 return ERR_PTR(-EINVAL);
 198
 199         /* Allocate the completion queue structure. */
 200         cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
 201         if (!cq)
 202                 return ERR_PTR(-ENOMEM);
 203
 204         /*
 205          * Allocate the completion queue entries and head/tail pointers.
 206          * This is allocated separately so that it can be resized and
 207          * also mapped into user space.
 208          * We need to use vmalloc() in order to support mmap and large
 209          * numbers of entries.
 210          */
 211         sz = sizeof(*wc);
 212         if (udata && udata->outlen >= sizeof(__u64))
 213                 sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
 214         else
 215                 sz += sizeof(struct ib_wc) * (entries + 1);
 216         wc = udata ?
 217                 vmalloc_user(sz) :
 218                 vzalloc_node(sz, rdi->dparms.node);
 219         if (!wc) {
 220                 ret = ERR_PTR(-ENOMEM);
 221                 goto bail_cq;
 222         }
 223
 224         /*
 225          * Return the address of the WC as the offset to mmap.
 226          * See rvt_mmap() for details.
 227          */
 228         if (udata && udata->outlen >= sizeof(__u64)) {
 229                 int err;
 230
 231                 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 232                 if (!cq->ip) {
 233                         ret = ERR_PTR(-ENOMEM);
 234                         goto bail_wc;
 235                 }
 236
 237                 err = ib_copy_to_udata(udata, &cq->ip->offset,
 238                                        sizeof(cq->ip->offset));
 239                 if (err) {
 240                         ret = ERR_PTR(err);
 241                         goto bail_ip;
 242                 }
 243         }
 244
 245         spin_lock(&rdi->n_cqs_lock);
 246         if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
 247                 spin_unlock(&rdi->n_cqs_lock);
 248                 ret = ERR_PTR(-ENOMEM);
 249                 goto bail_ip;
 250         }
 251
 252         rdi->n_cqs_allocated++;
 253         spin_unlock(&rdi->n_cqs_lock);
 254
 255         if (cq->ip) {
 256                 spin_lock_irq(&rdi->pending_lock);
 257                 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
 258                 spin_unlock_irq(&rdi->pending_lock);
 259         }
 260
 261         /*
 262          * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 263          * The number of entries should be >= the number requested or return
 264          * an error.
 265          */
 266         cq->rdi = rdi;
 267         cq->ibcq.cqe = entries;
 268         cq->notify = RVT_CQ_NONE;
 269         spin_lock_init(&cq->lock);
 270         kthread_init_work(&cq->comptask, send_complete);
 271         cq->queue = wc;
 272
 273         ret = &cq->ibcq;
 274
 275         goto done;
 276
 277 bail_ip:
 278         kfree(cq->ip);
 279 bail_wc:
 280         vfree(wc);
 281 bail_cq:
 282         kfree(cq);
 283 done:
 284         return ret;
 285 }
 286
 287 /**
 288  * rvt_destroy_cq - destroy a completion queue
 289  * @ibcq: the completion queue to destroy.
 290  *
 291  * Called by ib_destroy_cq() in the generic verbs code.
 292  *
 293  * Return: always 0
 294  */
 295 int rvt_destroy_cq(struct ib_cq *ibcq)
 296 {
 297         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 298         struct rvt_dev_info *rdi = cq->rdi;
 299
 300         kthread_flush_work(&cq->comptask);
 301         spin_lock(&rdi->n_cqs_lock);
 302         rdi->n_cqs_allocated--;
 303         spin_unlock(&rdi->n_cqs_lock);
 304         if (cq->ip)
 305                 kref_put(&cq->ip->ref, rvt_release_mmap_info);
 306         else
 307                 vfree(cq->queue);
 308         kfree(cq);
 309
 310         return 0;
 311 }
 312
 313 /**
 314  * rvt_req_notify_cq - change the notification type for a completion queue
 315  * @ibcq: the completion queue
 316  * @notify_flags: the type of notification to request
 317  *
 318  * This may be called from interrupt context.  Also called by
 319  * ib_req_notify_cq() in the generic verbs code.
 320  *
 321  * Return: 0 for success.
 322  */
 323 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 324 {
 325         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 326         unsigned long flags;
 327         int ret = 0;
 328
 329         spin_lock_irqsave(&cq->lock, flags);
 330         /*
 331          * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
 332          * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
 333          */
 334         if (cq->notify != IB_CQ_NEXT_COMP)
 335                 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
 336
 337         if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
 338             cq->queue->head != cq->queue->tail)
 339                 ret = 1;
 340
 341         spin_unlock_irqrestore(&cq->lock, flags);
 342
 343         return ret;
 344 }
 345
 346 /**
 347  * rvt_resize_cq - change the size of the CQ
 348  * @ibcq: the completion queue
 349  *
 350  * Return: 0 for success.
 351  */
 352 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 353 {
 354         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 355         struct rvt_cq_wc *old_wc;
 356         struct rvt_cq_wc *wc;
 357         u32 head, tail, n;
 358         int ret;
 359         u32 sz;
 360         struct rvt_dev_info *rdi = cq->rdi;
 361
 362         if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
 363                 return -EINVAL;
 364
 365         /*
 366          * Need to use vmalloc() if we want to support large #s of entries.
 367          */
 368         sz = sizeof(*wc);
 369         if (udata && udata->outlen >= sizeof(__u64))
 370                 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
 371         else
 372                 sz += sizeof(struct ib_wc) * (cqe + 1);
 373         wc = udata ?
 374                 vmalloc_user(sz) :
 375                 vzalloc_node(sz, rdi->dparms.node);
 376         if (!wc)
 377                 return -ENOMEM;
 378
 379         /* Check that we can write the offset to mmap. */
 380         if (udata && udata->outlen >= sizeof(__u64)) {
 381                 __u64 offset = 0;
 382
 383                 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
 384                 if (ret)
 385                         goto bail_free;
 386         }
 387
 388         spin_lock_irq(&cq->lock);
 389         /*
 390          * Make sure head and tail are sane since they
 391          * might be user writable.
 392          */
 393         old_wc = cq->queue;
 394         head = old_wc->head;
 395         if (head > (u32)cq->ibcq.cqe)
 396                 head = (u32)cq->ibcq.cqe;
 397         tail = old_wc->tail;
 398         if (tail > (u32)cq->ibcq.cqe)
 399                 tail = (u32)cq->ibcq.cqe;
 400         if (head < tail)
 401                 n = cq->ibcq.cqe + 1 + head - tail;
 402         else
 403                 n = head - tail;
 404         if (unlikely((u32)cqe < n)) {
 405                 ret = -EINVAL;
 406                 goto bail_unlock;
 407         }
 408         for (n = 0; tail != head; n++) {
 409                 if (cq->ip)
 410                         wc->uqueue[n] = old_wc->uqueue[tail];
 411                 else
 412                         wc->kqueue[n] = old_wc->kqueue[tail];
 413                 if (tail == (u32)cq->ibcq.cqe)
 414                         tail = 0;
 415                 else
 416                         tail++;
 417         }
 418         cq->ibcq.cqe = cqe;
 419         wc->head = n;
 420         wc->tail = 0;
 421         cq->queue = wc;
 422         spin_unlock_irq(&cq->lock);
 423
 424         vfree(old_wc);
 425
 426         if (cq->ip) {
 427                 struct rvt_mmap_info *ip = cq->ip;
 428
 429                 rvt_update_mmap_info(rdi, ip, sz, wc);
 430
 431                 /*
 432                  * Return the offset to mmap.
 433                  * See rvt_mmap() for details.
 434                  */
 435                 if (udata && udata->outlen >= sizeof(__u64)) {
 436                         ret = ib_copy_to_udata(udata, &ip->offset,
 437                                                sizeof(ip->offset));
 438                         if (ret)
 439                                 return ret;
 440                 }
 441
 442                 spin_lock_irq(&rdi->pending_lock);
 443                 if (list_empty(&ip->pending_mmaps))
 444                         list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
 445                 spin_unlock_irq(&rdi->pending_lock);
 446         }
 447
 448         return 0;
 449
 450 bail_unlock:
 451         spin_unlock_irq(&cq->lock);
 452 bail_free:
 453         vfree(wc);
 454         return ret;
 455 }
 456
 457 /**
 458  * rvt_poll_cq - poll for work completion entries
 459  * @ibcq: the completion queue to poll
 460  * @num_entries: the maximum number of entries to return
 461  * @entry: pointer to array where work completions are placed
 462  *
 463  * This may be called from interrupt context.  Also called by ib_poll_cq()
 464  * in the generic verbs code.
 465  *
 466  * Return: the number of completion entries polled.
 467  */
 468 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 469 {
 470         struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 471         struct rvt_cq_wc *wc;
 472         unsigned long flags;
 473         int npolled;
 474         u32 tail;
 475
 476         /* The kernel can only poll a kernel completion queue */
 477         if (cq->ip)
 478                 return -EINVAL;
 479
 480         spin_lock_irqsave(&cq->lock, flags);
 481
 482         wc = cq->queue;
 483         tail = wc->tail;
 484         if (tail > (u32)cq->ibcq.cqe)
 485                 tail = (u32)cq->ibcq.cqe;
 486         for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
 487                 if (tail == wc->head)
 488                         break;
 489                 /* The kernel doesn't need a RMB since it has the lock. */
 490                 *entry = wc->kqueue[tail];
 491                 if (tail >= cq->ibcq.cqe)
 492                         tail = 0;
 493                 else
 494                         tail++;
 495         }
 496         wc->tail = tail;
 497
 498         spin_unlock_irqrestore(&cq->lock, flags);
 499
 500         return npolled;
 501 }
 502
 503 /**
 504  * rvt_driver_cq_init - Init cq resources on behalf of driver
 505  * @rdi: rvt dev structure
 506  *
 507  * Return: 0 on success
 508  */
 509 int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 510 {
 511         int ret = 0;
 512         int cpu;
 513         struct task_struct *task;
 514
 515         if (rdi->worker)
 516                 return 0;
 517         spin_lock_init(&rdi->n_cqs_lock);
 518         rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
 519         if (!rdi->worker)
 520                 return -ENOMEM;
 521         kthread_init_worker(rdi->worker);
 522         task = kthread_create_on_node(
 523                 kthread_worker_fn,
 524                 rdi->worker,
 525                 rdi->dparms.node,
 526                 "%s", rdi->dparms.cq_name);
 527         if (IS_ERR(task)) {
 528                 kfree(rdi->worker);
 529                 rdi->worker = NULL;
 530                 return PTR_ERR(task);
 531         }
 532
 533         set_user_nice(task, MIN_NICE);
 534         cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 535         kthread_bind(task, cpu);
 536         wake_up_process(task);
 537         return ret;
 538 }
 539
 540 /**
 541  * rvt_cq_exit - tear down cq reources
 542  * @rdi: rvt dev structure
 543  */
 544 void rvt_cq_exit(struct rvt_dev_info *rdi)
 545 {
 546         struct kthread_worker *worker;
 547
 548         worker = rdi->worker;
 549         if (!worker)
 550                 return;
 551         /* blocks future queuing from send_complete() */
 552         rdi->worker = NULL;
 553         smp_wmb(); /* See rdi_cq_enter */
 554         kthread_flush_worker(worker);
 555         kthread_stop(worker->task);
 556         kfree(worker);
 557 }