2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static bool never_register;
74 static int topspin_workarounds = 1;
76 module_param(srp_sg_tablesize, uint, 0444);
77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
79 module_param(cmd_sg_entries, uint, 0444);
80 MODULE_PARM_DESC(cmd_sg_entries,
81 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
83 module_param(indirect_sg_entries, uint, 0444);
84 MODULE_PARM_DESC(indirect_sg_entries,
85 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
87 module_param(allow_ext_sg, bool, 0444);
88 MODULE_PARM_DESC(allow_ext_sg,
89 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
91 module_param(topspin_workarounds, int, 0444);
92 MODULE_PARM_DESC(topspin_workarounds,
93 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
95 module_param(prefer_fr, bool, 0444);
96 MODULE_PARM_DESC(prefer_fr,
97 "Whether to use fast registration if both FMR and fast registration are supported");
99 module_param(register_always, bool, 0444);
100 MODULE_PARM_DESC(register_always,
101 "Use memory registration even for contiguous memory regions");
103 module_param(never_register, bool, 0444);
104 MODULE_PARM_DESC(never_register, "Never register memory");
106 static const struct kernel_param_ops srp_tmo_ops;
108 static int srp_reconnect_delay = 10;
109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
113 static int srp_fast_io_fail_tmo = 15;
114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
116 MODULE_PARM_DESC(fast_io_fail_tmo,
117 "Number of seconds between the observation of a transport"
118 " layer error and failing all I/O. \"off\" means that this"
119 " functionality is disabled.");
121 static int srp_dev_loss_tmo = 600;
122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
124 MODULE_PARM_DESC(dev_loss_tmo,
125 "Maximum number of seconds that the SRP transport should"
126 " insulate transport layer errors. After this time has been"
127 " exceeded the SCSI host is removed. Should be"
128 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
129 " if fast_io_fail_tmo has not been set. \"off\" means that"
130 " this functionality is disabled.");
132 static unsigned ch_count;
133 module_param(ch_count, uint, 0444);
134 MODULE_PARM_DESC(ch_count,
135 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
137 static void srp_add_one(struct ib_device *device);
138 static void srp_remove_one(struct ib_device *device, void *client_data);
139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
144 static struct scsi_transport_template *ib_srp_transport_template;
145 static struct workqueue_struct *srp_remove_wq;
147 static struct ib_client srp_client = {
150 .remove = srp_remove_one
153 static struct ib_sa_client srp_sa_client;
155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
157 int tmo = *(int *)kp->arg;
160 return sprintf(buffer, "%d", tmo);
162 return sprintf(buffer, "off");
165 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
169 res = srp_parse_tmo(&tmo, val);
173 if (kp->arg == &srp_reconnect_delay)
174 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
176 else if (kp->arg == &srp_fast_io_fail_tmo)
177 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
179 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
183 *(int *)kp->arg = tmo;
189 static const struct kernel_param_ops srp_tmo_ops = {
194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
196 return (struct srp_target_port *) host->hostdata;
199 static const char *srp_target_info(struct Scsi_Host *host)
201 return host_to_target(host)->target_name;
204 static int srp_target_is_topspin(struct srp_target_port *target)
206 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
207 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
209 return topspin_workarounds &&
210 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
211 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
216 enum dma_data_direction direction)
220 iu = kmalloc(sizeof *iu, gfp_mask);
224 iu->buf = kzalloc(size, gfp_mask);
228 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
230 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
234 iu->direction = direction;
246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
251 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 static void srp_qp_event(struct ib_event *event, void *context)
259 pr_debug("QP event %s (%d)\n",
260 ib_event_msg(event->event), event->event);
263 static int srp_init_qp(struct srp_target_port *target,
266 struct ib_qp_attr *attr;
269 attr = kmalloc(sizeof *attr, GFP_KERNEL);
273 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
274 target->srp_host->port,
275 be16_to_cpu(target->pkey),
280 attr->qp_state = IB_QPS_INIT;
281 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
282 IB_ACCESS_REMOTE_WRITE);
283 attr->port_num = target->srp_host->port;
285 ret = ib_modify_qp(qp, attr,
296 static int srp_new_cm_id(struct srp_rdma_ch *ch)
298 struct srp_target_port *target = ch->target;
299 struct ib_cm_id *new_cm_id;
301 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
303 if (IS_ERR(new_cm_id))
304 return PTR_ERR(new_cm_id);
307 ib_destroy_cm_id(ch->cm_id);
308 ch->cm_id = new_cm_id;
309 ch->path.sgid = target->sgid;
310 ch->path.dgid = target->orig_dgid;
311 ch->path.pkey = target->pkey;
312 ch->path.service_id = target->service_id;
317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
319 struct srp_device *dev = target->srp_host->srp_dev;
320 struct ib_fmr_pool_param fmr_param;
322 memset(&fmr_param, 0, sizeof(fmr_param));
323 fmr_param.pool_size = target->mr_pool_size;
324 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
326 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
327 fmr_param.page_shift = ilog2(dev->mr_page_size);
328 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
329 IB_ACCESS_REMOTE_WRITE |
330 IB_ACCESS_REMOTE_READ);
332 return ib_create_fmr_pool(dev->pd, &fmr_param);
336 * srp_destroy_fr_pool() - free the resources owned by a pool
337 * @pool: Fast registration pool to be destroyed.
339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
342 struct srp_fr_desc *d;
347 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
355 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356 * @device: IB device to allocate fast registration descriptors for.
357 * @pd: Protection domain associated with the FR descriptors.
358 * @pool_size: Number of descriptors to allocate.
359 * @max_page_list_len: Maximum fast registration work request page list length.
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 struct ib_pd *pd, int pool_size,
363 int max_page_list_len)
365 struct srp_fr_pool *pool;
366 struct srp_fr_desc *d;
368 int i, ret = -EINVAL;
373 pool = kzalloc(sizeof(struct srp_fr_pool) +
374 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
377 pool->size = pool_size;
378 pool->max_page_list_len = max_page_list_len;
379 spin_lock_init(&pool->lock);
380 INIT_LIST_HEAD(&pool->free_list);
382 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
383 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
390 list_add_tail(&d->entry, &pool->free_list);
397 srp_destroy_fr_pool(pool);
405 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
406 * @pool: Pool to obtain descriptor from.
408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
410 struct srp_fr_desc *d = NULL;
413 spin_lock_irqsave(&pool->lock, flags);
414 if (!list_empty(&pool->free_list)) {
415 d = list_first_entry(&pool->free_list, typeof(*d), entry);
418 spin_unlock_irqrestore(&pool->lock, flags);
424 * srp_fr_pool_put() - put an FR descriptor back in the free list
425 * @pool: Pool the descriptor was allocated from.
426 * @desc: Pointer to an array of fast registration descriptor pointers.
427 * @n: Number of descriptors to put back.
429 * Note: The caller must already have queued an invalidation request for
430 * desc->mr->rkey before calling this function.
432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
438 spin_lock_irqsave(&pool->lock, flags);
439 for (i = 0; i < n; i++)
440 list_add(&desc[i]->entry, &pool->free_list);
441 spin_unlock_irqrestore(&pool->lock, flags);
444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
446 struct srp_device *dev = target->srp_host->srp_dev;
448 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
449 dev->max_pages_per_mr);
453 * srp_destroy_qp() - destroy an RDMA queue pair
454 * @qp: RDMA queue pair.
456 * Drain the qp before destroying it. This avoids that the receive
457 * completion handler can access the queue pair while it is
460 static void srp_destroy_qp(struct ib_qp *qp)
466 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
468 struct srp_target_port *target = ch->target;
469 struct srp_device *dev = target->srp_host->srp_dev;
470 struct ib_qp_init_attr *init_attr;
471 struct ib_cq *recv_cq, *send_cq;
473 struct ib_fmr_pool *fmr_pool = NULL;
474 struct srp_fr_pool *fr_pool = NULL;
475 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
478 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
482 /* queue_size + 1 for ib_drain_rq() */
483 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
484 ch->comp_vector, IB_POLL_SOFTIRQ);
485 if (IS_ERR(recv_cq)) {
486 ret = PTR_ERR(recv_cq);
490 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
491 ch->comp_vector, IB_POLL_DIRECT);
492 if (IS_ERR(send_cq)) {
493 ret = PTR_ERR(send_cq);
497 init_attr->event_handler = srp_qp_event;
498 init_attr->cap.max_send_wr = m * target->queue_size;
499 init_attr->cap.max_recv_wr = target->queue_size + 1;
500 init_attr->cap.max_recv_sge = 1;
501 init_attr->cap.max_send_sge = 1;
502 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
503 init_attr->qp_type = IB_QPT_RC;
504 init_attr->send_cq = send_cq;
505 init_attr->recv_cq = recv_cq;
507 qp = ib_create_qp(dev->pd, init_attr);
513 ret = srp_init_qp(target, qp);
517 if (dev->use_fast_reg) {
518 fr_pool = srp_alloc_fr_pool(target);
519 if (IS_ERR(fr_pool)) {
520 ret = PTR_ERR(fr_pool);
521 shost_printk(KERN_WARNING, target->scsi_host, PFX
522 "FR pool allocation failed (%d)\n", ret);
525 } else if (dev->use_fmr) {
526 fmr_pool = srp_alloc_fmr_pool(target);
527 if (IS_ERR(fmr_pool)) {
528 ret = PTR_ERR(fmr_pool);
529 shost_printk(KERN_WARNING, target->scsi_host, PFX
530 "FMR pool allocation failed (%d)\n", ret);
536 srp_destroy_qp(ch->qp);
538 ib_free_cq(ch->recv_cq);
540 ib_free_cq(ch->send_cq);
543 ch->recv_cq = recv_cq;
544 ch->send_cq = send_cq;
546 if (dev->use_fast_reg) {
548 srp_destroy_fr_pool(ch->fr_pool);
549 ch->fr_pool = fr_pool;
550 } else if (dev->use_fmr) {
552 ib_destroy_fmr_pool(ch->fmr_pool);
553 ch->fmr_pool = fmr_pool;
574 * Note: this function may be called without srp_alloc_iu_bufs() having been
575 * invoked. Hence the ch->[rt]x_ring checks.
577 static void srp_free_ch_ib(struct srp_target_port *target,
578 struct srp_rdma_ch *ch)
580 struct srp_device *dev = target->srp_host->srp_dev;
587 ib_destroy_cm_id(ch->cm_id);
591 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
595 if (dev->use_fast_reg) {
597 srp_destroy_fr_pool(ch->fr_pool);
598 } else if (dev->use_fmr) {
600 ib_destroy_fmr_pool(ch->fmr_pool);
603 srp_destroy_qp(ch->qp);
604 ib_free_cq(ch->send_cq);
605 ib_free_cq(ch->recv_cq);
608 * Avoid that the SCSI error handler tries to use this channel after
609 * it has been freed. The SCSI error handler can namely continue
610 * trying to perform recovery actions after scsi_remove_host()
616 ch->send_cq = ch->recv_cq = NULL;
619 for (i = 0; i < target->queue_size; ++i)
620 srp_free_iu(target->srp_host, ch->rx_ring[i]);
625 for (i = 0; i < target->queue_size; ++i)
626 srp_free_iu(target->srp_host, ch->tx_ring[i]);
632 static void srp_path_rec_completion(int status,
633 struct ib_sa_path_rec *pathrec,
636 struct srp_rdma_ch *ch = ch_ptr;
637 struct srp_target_port *target = ch->target;
641 shost_printk(KERN_ERR, target->scsi_host,
642 PFX "Got failed path rec status %d\n", status);
648 static int srp_lookup_path(struct srp_rdma_ch *ch)
650 struct srp_target_port *target = ch->target;
653 ch->path.numb_path = 1;
655 init_completion(&ch->done);
658 * Avoid that the SCSI host can be removed by srp_remove_target()
659 * before srp_path_rec_completion() is called.
661 if (!scsi_host_get(target->scsi_host))
664 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
665 target->srp_host->srp_dev->dev,
666 target->srp_host->port,
668 IB_SA_PATH_REC_SERVICE_ID |
669 IB_SA_PATH_REC_DGID |
670 IB_SA_PATH_REC_SGID |
671 IB_SA_PATH_REC_NUMB_PATH |
673 SRP_PATH_REC_TIMEOUT_MS,
675 srp_path_rec_completion,
676 ch, &ch->path_query);
677 ret = ch->path_query_id;
681 ret = wait_for_completion_interruptible(&ch->done);
687 shost_printk(KERN_WARNING, target->scsi_host,
688 PFX "Path record query failed\n");
691 scsi_host_put(target->scsi_host);
697 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
699 struct srp_target_port *target = ch->target;
701 struct ib_cm_req_param param;
702 struct srp_login_req priv;
706 req = kzalloc(sizeof *req, GFP_KERNEL);
710 req->param.primary_path = &ch->path;
711 req->param.alternate_path = NULL;
712 req->param.service_id = target->service_id;
713 req->param.qp_num = ch->qp->qp_num;
714 req->param.qp_type = ch->qp->qp_type;
715 req->param.private_data = &req->priv;
716 req->param.private_data_len = sizeof req->priv;
717 req->param.flow_control = 1;
719 get_random_bytes(&req->param.starting_psn, 4);
720 req->param.starting_psn &= 0xffffff;
723 * Pick some arbitrary defaults here; we could make these
724 * module parameters if anyone cared about setting them.
726 req->param.responder_resources = 4;
727 req->param.remote_cm_response_timeout = 20;
728 req->param.local_cm_response_timeout = 20;
729 req->param.retry_count = target->tl_retry_count;
730 req->param.rnr_retry_count = 7;
731 req->param.max_cm_retries = 15;
733 req->priv.opcode = SRP_LOGIN_REQ;
735 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
736 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
737 SRP_BUF_FORMAT_INDIRECT);
738 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
739 SRP_MULTICHAN_SINGLE);
741 * In the published SRP specification (draft rev. 16a), the
742 * port identifier format is 8 bytes of ID extension followed
743 * by 8 bytes of GUID. Older drafts put the two halves in the
744 * opposite order, so that the GUID comes first.
746 * Targets conforming to these obsolete drafts can be
747 * recognized by the I/O Class they report.
749 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
750 memcpy(req->priv.initiator_port_id,
751 &target->sgid.global.interface_id, 8);
752 memcpy(req->priv.initiator_port_id + 8,
753 &target->initiator_ext, 8);
754 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
755 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
757 memcpy(req->priv.initiator_port_id,
758 &target->initiator_ext, 8);
759 memcpy(req->priv.initiator_port_id + 8,
760 &target->sgid.global.interface_id, 8);
761 memcpy(req->priv.target_port_id, &target->id_ext, 8);
762 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
766 * Topspin/Cisco SRP targets will reject our login unless we
767 * zero out the first 8 bytes of our initiator port ID and set
768 * the second 8 bytes to the local node GUID.
770 if (srp_target_is_topspin(target)) {
771 shost_printk(KERN_DEBUG, target->scsi_host,
772 PFX "Topspin/Cisco initiator port ID workaround "
773 "activated for target GUID %016llx\n",
774 be64_to_cpu(target->ioc_guid));
775 memset(req->priv.initiator_port_id, 0, 8);
776 memcpy(req->priv.initiator_port_id + 8,
777 &target->srp_host->srp_dev->dev->node_guid, 8);
780 status = ib_send_cm_req(ch->cm_id, &req->param);
787 static bool srp_queue_remove_work(struct srp_target_port *target)
789 bool changed = false;
791 spin_lock_irq(&target->lock);
792 if (target->state != SRP_TARGET_REMOVED) {
793 target->state = SRP_TARGET_REMOVED;
796 spin_unlock_irq(&target->lock);
799 queue_work(srp_remove_wq, &target->remove_work);
804 static void srp_disconnect_target(struct srp_target_port *target)
806 struct srp_rdma_ch *ch;
809 /* XXX should send SRP_I_LOGOUT request */
811 for (i = 0; i < target->ch_count; i++) {
813 ch->connected = false;
814 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
815 shost_printk(KERN_DEBUG, target->scsi_host,
816 PFX "Sending CM DREQ failed\n");
821 static void srp_free_req_data(struct srp_target_port *target,
822 struct srp_rdma_ch *ch)
824 struct srp_device *dev = target->srp_host->srp_dev;
825 struct ib_device *ibdev = dev->dev;
826 struct srp_request *req;
832 for (i = 0; i < target->req_ring_size; ++i) {
833 req = &ch->req_ring[i];
834 if (dev->use_fast_reg) {
837 kfree(req->fmr_list);
838 kfree(req->map_page);
840 if (req->indirect_dma_addr) {
841 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
842 target->indirect_size,
845 kfree(req->indirect_desc);
852 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
854 struct srp_target_port *target = ch->target;
855 struct srp_device *srp_dev = target->srp_host->srp_dev;
856 struct ib_device *ibdev = srp_dev->dev;
857 struct srp_request *req;
860 int i, ret = -ENOMEM;
862 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
867 for (i = 0; i < target->req_ring_size; ++i) {
868 req = &ch->req_ring[i];
869 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
873 if (srp_dev->use_fast_reg) {
874 req->fr_list = mr_list;
876 req->fmr_list = mr_list;
877 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
878 sizeof(void *), GFP_KERNEL);
882 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
883 if (!req->indirect_desc)
886 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
887 target->indirect_size,
889 if (ib_dma_mapping_error(ibdev, dma_addr))
892 req->indirect_dma_addr = dma_addr;
901 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
902 * @shost: SCSI host whose attributes to remove from sysfs.
904 * Note: Any attributes defined in the host template and that did not exist
905 * before invocation of this function will be ignored.
907 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
909 struct device_attribute **attr;
911 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
912 device_remove_file(&shost->shost_dev, *attr);
915 static void srp_remove_target(struct srp_target_port *target)
917 struct srp_rdma_ch *ch;
920 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
922 srp_del_scsi_host_attr(target->scsi_host);
923 srp_rport_get(target->rport);
924 srp_remove_host(target->scsi_host);
925 scsi_remove_host(target->scsi_host);
926 srp_stop_rport_timers(target->rport);
927 srp_disconnect_target(target);
928 for (i = 0; i < target->ch_count; i++) {
930 srp_free_ch_ib(target, ch);
932 cancel_work_sync(&target->tl_err_work);
933 srp_rport_put(target->rport);
934 for (i = 0; i < target->ch_count; i++) {
936 srp_free_req_data(target, ch);
941 spin_lock(&target->srp_host->target_lock);
942 list_del(&target->list);
943 spin_unlock(&target->srp_host->target_lock);
945 scsi_host_put(target->scsi_host);
948 static void srp_remove_work(struct work_struct *work)
950 struct srp_target_port *target =
951 container_of(work, struct srp_target_port, remove_work);
953 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
955 srp_remove_target(target);
958 static void srp_rport_delete(struct srp_rport *rport)
960 struct srp_target_port *target = rport->lld_data;
962 srp_queue_remove_work(target);
966 * srp_connected_ch() - number of connected channels
967 * @target: SRP target port.
969 static int srp_connected_ch(struct srp_target_port *target)
973 for (i = 0; i < target->ch_count; i++)
974 c += target->ch[i].connected;
979 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
981 struct srp_target_port *target = ch->target;
984 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
986 ret = srp_lookup_path(ch);
991 init_completion(&ch->done);
992 ret = srp_send_req(ch, multich);
995 ret = wait_for_completion_interruptible(&ch->done);
1000 * The CM event handling code will set status to
1001 * SRP_PORT_REDIRECT if we get a port redirect REJ
1002 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1003 * redirect REJ back.
1008 ch->connected = true;
1011 case SRP_PORT_REDIRECT:
1012 ret = srp_lookup_path(ch);
1017 case SRP_DLID_REDIRECT:
1020 case SRP_STALE_CONN:
1021 shost_printk(KERN_ERR, target->scsi_host, PFX
1022 "giving up on stale connection\n");
1032 return ret <= 0 ? ret : -ENODEV;
1035 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1037 srp_handle_qp_err(cq, wc, "INV RKEY");
1040 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1043 struct ib_send_wr *bad_wr;
1044 struct ib_send_wr wr = {
1045 .opcode = IB_WR_LOCAL_INV,
1049 .ex.invalidate_rkey = rkey,
1052 wr.wr_cqe = &req->reg_cqe;
1053 req->reg_cqe.done = srp_inv_rkey_err_done;
1054 return ib_post_send(ch->qp, &wr, &bad_wr);
1057 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1058 struct srp_rdma_ch *ch,
1059 struct srp_request *req)
1061 struct srp_target_port *target = ch->target;
1062 struct srp_device *dev = target->srp_host->srp_dev;
1063 struct ib_device *ibdev = dev->dev;
1066 if (!scsi_sglist(scmnd) ||
1067 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1068 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1071 if (dev->use_fast_reg) {
1072 struct srp_fr_desc **pfr;
1074 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1075 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1077 shost_printk(KERN_ERR, target->scsi_host, PFX
1078 "Queueing INV WR for rkey %#x failed (%d)\n",
1079 (*pfr)->mr->rkey, res);
1080 queue_work(system_long_wq,
1081 &target->tl_err_work);
1085 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1087 } else if (dev->use_fmr) {
1088 struct ib_pool_fmr **pfmr;
1090 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1091 ib_fmr_pool_unmap(*pfmr);
1094 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1095 scmnd->sc_data_direction);
1099 * srp_claim_req - Take ownership of the scmnd associated with a request.
1100 * @ch: SRP RDMA channel.
1101 * @req: SRP request.
1102 * @sdev: If not NULL, only take ownership for this SCSI device.
1103 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1104 * ownership of @req->scmnd if it equals @scmnd.
1107 * Either NULL or a pointer to the SCSI command the caller became owner of.
1109 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1110 struct srp_request *req,
1111 struct scsi_device *sdev,
1112 struct scsi_cmnd *scmnd)
1114 unsigned long flags;
1116 spin_lock_irqsave(&ch->lock, flags);
1118 (!sdev || req->scmnd->device == sdev) &&
1119 (!scmnd || req->scmnd == scmnd)) {
1125 spin_unlock_irqrestore(&ch->lock, flags);
1131 * srp_free_req() - Unmap data and adjust ch->req_lim.
1132 * @ch: SRP RDMA channel.
1133 * @req: Request to be freed.
1134 * @scmnd: SCSI command associated with @req.
1135 * @req_lim_delta: Amount to be added to @target->req_lim.
1137 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1138 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1140 unsigned long flags;
1142 srp_unmap_data(scmnd, ch, req);
1144 spin_lock_irqsave(&ch->lock, flags);
1145 ch->req_lim += req_lim_delta;
1146 spin_unlock_irqrestore(&ch->lock, flags);
1149 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1150 struct scsi_device *sdev, int result)
1152 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1155 srp_free_req(ch, req, scmnd, 0);
1156 scmnd->result = result;
1157 scmnd->scsi_done(scmnd);
1161 static void srp_terminate_io(struct srp_rport *rport)
1163 struct srp_target_port *target = rport->lld_data;
1164 struct srp_rdma_ch *ch;
1165 struct Scsi_Host *shost = target->scsi_host;
1166 struct scsi_device *sdev;
1170 * Invoking srp_terminate_io() while srp_queuecommand() is running
1171 * is not safe. Hence the warning statement below.
1173 shost_for_each_device(sdev, shost)
1174 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1176 for (i = 0; i < target->ch_count; i++) {
1177 ch = &target->ch[i];
1179 for (j = 0; j < target->req_ring_size; ++j) {
1180 struct srp_request *req = &ch->req_ring[j];
1182 srp_finish_req(ch, req, NULL,
1183 DID_TRANSPORT_FAILFAST << 16);
1189 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1190 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1191 * srp_reset_device() or srp_reset_host() calls will occur while this function
1192 * is in progress. One way to realize that is not to call this function
1193 * directly but to call srp_reconnect_rport() instead since that last function
1194 * serializes calls of this function via rport->mutex and also blocks
1195 * srp_queuecommand() calls before invoking this function.
1197 static int srp_rport_reconnect(struct srp_rport *rport)
1199 struct srp_target_port *target = rport->lld_data;
1200 struct srp_rdma_ch *ch;
1202 bool multich = false;
1204 srp_disconnect_target(target);
1206 if (target->state == SRP_TARGET_SCANNING)
1210 * Now get a new local CM ID so that we avoid confusing the target in
1211 * case things are really fouled up. Doing so also ensures that all CM
1212 * callbacks will have finished before a new QP is allocated.
1214 for (i = 0; i < target->ch_count; i++) {
1215 ch = &target->ch[i];
1216 ret += srp_new_cm_id(ch);
1218 for (i = 0; i < target->ch_count; i++) {
1219 ch = &target->ch[i];
1220 for (j = 0; j < target->req_ring_size; ++j) {
1221 struct srp_request *req = &ch->req_ring[j];
1223 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1226 for (i = 0; i < target->ch_count; i++) {
1227 ch = &target->ch[i];
1229 * Whether or not creating a new CM ID succeeded, create a new
1230 * QP. This guarantees that all completion callback function
1231 * invocations have finished before request resetting starts.
1233 ret += srp_create_ch_ib(ch);
1235 INIT_LIST_HEAD(&ch->free_tx);
1236 for (j = 0; j < target->queue_size; ++j)
1237 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1240 target->qp_in_error = false;
1242 for (i = 0; i < target->ch_count; i++) {
1243 ch = &target->ch[i];
1246 ret = srp_connect_ch(ch, multich);
1251 shost_printk(KERN_INFO, target->scsi_host,
1252 PFX "reconnect succeeded\n");
1257 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1258 unsigned int dma_len, u32 rkey)
1260 struct srp_direct_buf *desc = state->desc;
1262 WARN_ON_ONCE(!dma_len);
1264 desc->va = cpu_to_be64(dma_addr);
1265 desc->key = cpu_to_be32(rkey);
1266 desc->len = cpu_to_be32(dma_len);
1268 state->total_len += dma_len;
1273 static int srp_map_finish_fmr(struct srp_map_state *state,
1274 struct srp_rdma_ch *ch)
1276 struct srp_target_port *target = ch->target;
1277 struct srp_device *dev = target->srp_host->srp_dev;
1278 struct ib_pd *pd = target->pd;
1279 struct ib_pool_fmr *fmr;
1282 if (state->fmr.next >= state->fmr.end)
1285 WARN_ON_ONCE(!dev->use_fmr);
1287 if (state->npages == 0)
1290 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1291 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1292 pd->unsafe_global_rkey);
1296 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1297 state->npages, io_addr);
1299 return PTR_ERR(fmr);
1301 *state->fmr.next++ = fmr;
1304 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1305 state->dma_len, fmr->fmr->rkey);
1314 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1316 srp_handle_qp_err(cq, wc, "FAST REG");
1320 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1321 * where to start in the first element. If sg_offset_p != NULL then
1322 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1323 * byte that has not yet been mapped.
1325 static int srp_map_finish_fr(struct srp_map_state *state,
1326 struct srp_request *req,
1327 struct srp_rdma_ch *ch, int sg_nents,
1328 unsigned int *sg_offset_p)
1330 struct srp_target_port *target = ch->target;
1331 struct srp_device *dev = target->srp_host->srp_dev;
1332 struct ib_pd *pd = target->pd;
1333 struct ib_send_wr *bad_wr;
1334 struct ib_reg_wr wr;
1335 struct srp_fr_desc *desc;
1339 if (state->fr.next >= state->fr.end)
1342 WARN_ON_ONCE(!dev->use_fast_reg);
1344 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1345 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1347 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1348 sg_dma_len(state->sg) - sg_offset,
1349 pd->unsafe_global_rkey);
1355 desc = srp_fr_pool_get(ch->fr_pool);
1359 rkey = ib_inc_rkey(desc->mr->rkey);
1360 ib_update_fast_reg_key(desc->mr, rkey);
1362 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1364 if (unlikely(n < 0)) {
1365 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1366 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1367 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1368 sg_offset_p ? *sg_offset_p : -1, n);
1372 WARN_ON_ONCE(desc->mr->length == 0);
1374 req->reg_cqe.done = srp_reg_mr_err_done;
1377 wr.wr.opcode = IB_WR_REG_MR;
1378 wr.wr.wr_cqe = &req->reg_cqe;
1380 wr.wr.send_flags = 0;
1382 wr.key = desc->mr->rkey;
1383 wr.access = (IB_ACCESS_LOCAL_WRITE |
1384 IB_ACCESS_REMOTE_READ |
1385 IB_ACCESS_REMOTE_WRITE);
1387 *state->fr.next++ = desc;
1390 srp_map_desc(state, desc->mr->iova,
1391 desc->mr->length, desc->mr->rkey);
1393 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1394 if (unlikely(err)) {
1395 WARN_ON_ONCE(err == -ENOMEM);
1402 static int srp_map_sg_entry(struct srp_map_state *state,
1403 struct srp_rdma_ch *ch,
1404 struct scatterlist *sg)
1406 struct srp_target_port *target = ch->target;
1407 struct srp_device *dev = target->srp_host->srp_dev;
1408 struct ib_device *ibdev = dev->dev;
1409 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1410 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1411 unsigned int len = 0;
1414 WARN_ON_ONCE(!dma_len);
1417 unsigned offset = dma_addr & ~dev->mr_page_mask;
1419 if (state->npages == dev->max_pages_per_mr ||
1420 (state->npages > 0 && offset != 0)) {
1421 ret = srp_map_finish_fmr(state, ch);
1426 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1429 state->base_dma_addr = dma_addr;
1430 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1431 state->dma_len += len;
1437 * If the end of the MR is not on a page boundary then we need to
1438 * close it out and start a new one -- we can only merge at page
1442 if ((dma_addr & ~dev->mr_page_mask) != 0)
1443 ret = srp_map_finish_fmr(state, ch);
1447 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1448 struct srp_request *req, struct scatterlist *scat,
1451 struct scatterlist *sg;
1454 state->pages = req->map_page;
1455 state->fmr.next = req->fmr_list;
1456 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1458 for_each_sg(scat, sg, count, i) {
1459 ret = srp_map_sg_entry(state, ch, sg);
1464 ret = srp_map_finish_fmr(state, ch);
1471 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1472 struct srp_request *req, struct scatterlist *scat,
1475 unsigned int sg_offset = 0;
1477 state->fr.next = req->fr_list;
1478 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1487 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1488 if (unlikely(n < 0))
1492 for (i = 0; i < n; i++)
1493 state->sg = sg_next(state->sg);
1499 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1500 struct srp_request *req, struct scatterlist *scat,
1503 struct srp_target_port *target = ch->target;
1504 struct srp_device *dev = target->srp_host->srp_dev;
1505 struct scatterlist *sg;
1508 for_each_sg(scat, sg, count, i) {
1509 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1510 ib_sg_dma_len(dev->dev, sg),
1511 target->pd->unsafe_global_rkey);
1518 * Register the indirect data buffer descriptor with the HCA.
1520 * Note: since the indirect data buffer descriptor has been allocated with
1521 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1524 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1525 void **next_mr, void **end_mr, u32 idb_len,
1528 struct srp_target_port *target = ch->target;
1529 struct srp_device *dev = target->srp_host->srp_dev;
1530 struct srp_map_state state;
1531 struct srp_direct_buf idb_desc;
1533 struct scatterlist idb_sg[1];
1536 memset(&state, 0, sizeof(state));
1537 memset(&idb_desc, 0, sizeof(idb_desc));
1538 state.gen.next = next_mr;
1539 state.gen.end = end_mr;
1540 state.desc = &idb_desc;
1541 state.base_dma_addr = req->indirect_dma_addr;
1542 state.dma_len = idb_len;
1544 if (dev->use_fast_reg) {
1546 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1547 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1548 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1549 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1551 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1554 WARN_ON_ONCE(ret < 1);
1555 } else if (dev->use_fmr) {
1556 state.pages = idb_pages;
1557 state.pages[0] = (req->indirect_dma_addr &
1560 ret = srp_map_finish_fmr(&state, ch);
1567 *idb_rkey = idb_desc.key;
1572 #if defined(DYNAMIC_DATA_DEBUG)
1573 static void srp_check_mapping(struct srp_map_state *state,
1574 struct srp_rdma_ch *ch, struct srp_request *req,
1575 struct scatterlist *scat, int count)
1577 struct srp_device *dev = ch->target->srp_host->srp_dev;
1578 struct srp_fr_desc **pfr;
1579 u64 desc_len = 0, mr_len = 0;
1582 for (i = 0; i < state->ndesc; i++)
1583 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1584 if (dev->use_fast_reg)
1585 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1586 mr_len += (*pfr)->mr->length;
1587 else if (dev->use_fmr)
1588 for (i = 0; i < state->nmdesc; i++)
1589 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1590 if (desc_len != scsi_bufflen(req->scmnd) ||
1591 mr_len > scsi_bufflen(req->scmnd))
1592 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1593 scsi_bufflen(req->scmnd), desc_len, mr_len,
1594 state->ndesc, state->nmdesc);
1599 * srp_map_data() - map SCSI data buffer onto an SRP request
1600 * @scmnd: SCSI command to map
1601 * @ch: SRP RDMA channel
1604 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1607 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1608 struct srp_request *req)
1610 struct srp_target_port *target = ch->target;
1611 struct ib_pd *pd = target->pd;
1612 struct scatterlist *scat;
1613 struct srp_cmd *cmd = req->cmd->buf;
1614 int len, nents, count, ret;
1615 struct srp_device *dev;
1616 struct ib_device *ibdev;
1617 struct srp_map_state state;
1618 struct srp_indirect_buf *indirect_hdr;
1619 u32 idb_len, table_len;
1623 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1624 return sizeof (struct srp_cmd);
1626 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1627 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1628 shost_printk(KERN_WARNING, target->scsi_host,
1629 PFX "Unhandled data direction %d\n",
1630 scmnd->sc_data_direction);
1634 nents = scsi_sg_count(scmnd);
1635 scat = scsi_sglist(scmnd);
1637 dev = target->srp_host->srp_dev;
1640 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1641 if (unlikely(count == 0))
1644 fmt = SRP_DATA_DESC_DIRECT;
1645 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1647 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1649 * The midlayer only generated a single gather/scatter
1650 * entry, or DMA mapping coalesced everything to a
1651 * single entry. So a direct descriptor along with
1652 * the DMA MR suffices.
1654 struct srp_direct_buf *buf = (void *) cmd->add_data;
1656 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1657 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1658 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1665 * We have more than one scatter/gather entry, so build our indirect
1666 * descriptor table, trying to merge as many entries as we can.
1668 indirect_hdr = (void *) cmd->add_data;
1670 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1671 target->indirect_size, DMA_TO_DEVICE);
1673 memset(&state, 0, sizeof(state));
1674 state.desc = req->indirect_desc;
1675 if (dev->use_fast_reg)
1676 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1677 else if (dev->use_fmr)
1678 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1680 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1681 req->nmdesc = state.nmdesc;
1685 #if defined(DYNAMIC_DEBUG)
1687 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1688 "Memory mapping consistency check");
1689 if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1690 srp_check_mapping(&state, ch, req, scat, count);
1694 /* We've mapped the request, now pull as much of the indirect
1695 * descriptor table as we can into the command buffer. If this
1696 * target is not using an external indirect table, we are
1697 * guaranteed to fit into the command, as the SCSI layer won't
1698 * give us more S/G entries than we allow.
1700 if (state.ndesc == 1) {
1702 * Memory registration collapsed the sg-list into one entry,
1703 * so use a direct descriptor.
1705 struct srp_direct_buf *buf = (void *) cmd->add_data;
1707 *buf = req->indirect_desc[0];
1711 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1712 !target->allow_ext_sg)) {
1713 shost_printk(KERN_ERR, target->scsi_host,
1714 "Could not fit S/G list into SRP_CMD\n");
1719 count = min(state.ndesc, target->cmd_sg_cnt);
1720 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1721 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1723 fmt = SRP_DATA_DESC_INDIRECT;
1724 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1725 len += count * sizeof (struct srp_direct_buf);
1727 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1728 count * sizeof (struct srp_direct_buf));
1730 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1731 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1732 idb_len, &idb_rkey);
1737 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1740 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1741 indirect_hdr->table_desc.key = idb_rkey;
1742 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1743 indirect_hdr->len = cpu_to_be32(state.total_len);
1745 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1746 cmd->data_out_desc_cnt = count;
1748 cmd->data_in_desc_cnt = count;
1750 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1754 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1755 cmd->buf_fmt = fmt << 4;
1762 srp_unmap_data(scmnd, ch, req);
1763 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1769 * Return an IU and possible credit to the free pool
1771 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1772 enum srp_iu_type iu_type)
1774 unsigned long flags;
1776 spin_lock_irqsave(&ch->lock, flags);
1777 list_add(&iu->list, &ch->free_tx);
1778 if (iu_type != SRP_IU_RSP)
1780 spin_unlock_irqrestore(&ch->lock, flags);
1784 * Must be called with ch->lock held to protect req_lim and free_tx.
1785 * If IU is not sent, it must be returned using srp_put_tx_iu().
1788 * An upper limit for the number of allocated information units for each
1790 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1791 * more than Scsi_Host.can_queue requests.
1792 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1793 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1794 * one unanswered SRP request to an initiator.
1796 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1797 enum srp_iu_type iu_type)
1799 struct srp_target_port *target = ch->target;
1800 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1803 ib_process_cq_direct(ch->send_cq, -1);
1805 if (list_empty(&ch->free_tx))
1808 /* Initiator responses to target requests do not consume credits */
1809 if (iu_type != SRP_IU_RSP) {
1810 if (ch->req_lim <= rsv) {
1811 ++target->zero_req_lim;
1818 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1819 list_del(&iu->list);
1823 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1825 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1826 struct srp_rdma_ch *ch = cq->cq_context;
1828 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1829 srp_handle_qp_err(cq, wc, "SEND");
1833 list_add(&iu->list, &ch->free_tx);
1836 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1838 struct srp_target_port *target = ch->target;
1840 struct ib_send_wr wr, *bad_wr;
1842 list.addr = iu->dma;
1844 list.lkey = target->lkey;
1846 iu->cqe.done = srp_send_done;
1849 wr.wr_cqe = &iu->cqe;
1852 wr.opcode = IB_WR_SEND;
1853 wr.send_flags = IB_SEND_SIGNALED;
1855 return ib_post_send(ch->qp, &wr, &bad_wr);
1858 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1860 struct srp_target_port *target = ch->target;
1861 struct ib_recv_wr wr, *bad_wr;
1864 list.addr = iu->dma;
1865 list.length = iu->size;
1866 list.lkey = target->lkey;
1868 iu->cqe.done = srp_recv_done;
1871 wr.wr_cqe = &iu->cqe;
1875 return ib_post_recv(ch->qp, &wr, &bad_wr);
1878 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1880 struct srp_target_port *target = ch->target;
1881 struct srp_request *req;
1882 struct scsi_cmnd *scmnd;
1883 unsigned long flags;
1885 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1886 spin_lock_irqsave(&ch->lock, flags);
1887 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1888 if (rsp->tag == ch->tsk_mgmt_tag) {
1889 ch->tsk_mgmt_status = -1;
1890 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1891 ch->tsk_mgmt_status = rsp->data[3];
1892 complete(&ch->tsk_mgmt_done);
1894 shost_printk(KERN_ERR, target->scsi_host,
1895 "Received tsk mgmt response too late for tag %#llx\n",
1898 spin_unlock_irqrestore(&ch->lock, flags);
1900 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1901 if (scmnd && scmnd->host_scribble) {
1902 req = (void *)scmnd->host_scribble;
1903 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1908 shost_printk(KERN_ERR, target->scsi_host,
1909 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1910 rsp->tag, ch - target->ch, ch->qp->qp_num);
1912 spin_lock_irqsave(&ch->lock, flags);
1913 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1914 spin_unlock_irqrestore(&ch->lock, flags);
1918 scmnd->result = rsp->status;
1920 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1921 memcpy(scmnd->sense_buffer, rsp->data +
1922 be32_to_cpu(rsp->resp_data_len),
1923 min_t(int, be32_to_cpu(rsp->sense_data_len),
1924 SCSI_SENSE_BUFFERSIZE));
1927 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1928 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1929 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1930 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1931 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1932 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1933 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1934 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1936 srp_free_req(ch, req, scmnd,
1937 be32_to_cpu(rsp->req_lim_delta));
1939 scmnd->host_scribble = NULL;
1940 scmnd->scsi_done(scmnd);
1944 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1947 struct srp_target_port *target = ch->target;
1948 struct ib_device *dev = target->srp_host->srp_dev->dev;
1949 unsigned long flags;
1953 spin_lock_irqsave(&ch->lock, flags);
1954 ch->req_lim += req_delta;
1955 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1956 spin_unlock_irqrestore(&ch->lock, flags);
1959 shost_printk(KERN_ERR, target->scsi_host, PFX
1960 "no IU available to send response\n");
1964 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1965 memcpy(iu->buf, rsp, len);
1966 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1968 err = srp_post_send(ch, iu, len);
1970 shost_printk(KERN_ERR, target->scsi_host, PFX
1971 "unable to post response: %d\n", err);
1972 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1978 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1979 struct srp_cred_req *req)
1981 struct srp_cred_rsp rsp = {
1982 .opcode = SRP_CRED_RSP,
1985 s32 delta = be32_to_cpu(req->req_lim_delta);
1987 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1988 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1989 "problems processing SRP_CRED_REQ\n");
1992 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1993 struct srp_aer_req *req)
1995 struct srp_target_port *target = ch->target;
1996 struct srp_aer_rsp rsp = {
1997 .opcode = SRP_AER_RSP,
2000 s32 delta = be32_to_cpu(req->req_lim_delta);
2002 shost_printk(KERN_ERR, target->scsi_host, PFX
2003 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2005 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2006 shost_printk(KERN_ERR, target->scsi_host, PFX
2007 "problems processing SRP_AER_REQ\n");
2010 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2012 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2013 struct srp_rdma_ch *ch = cq->cq_context;
2014 struct srp_target_port *target = ch->target;
2015 struct ib_device *dev = target->srp_host->srp_dev->dev;
2019 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2020 srp_handle_qp_err(cq, wc, "RECV");
2024 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2027 opcode = *(u8 *) iu->buf;
2030 shost_printk(KERN_ERR, target->scsi_host,
2031 PFX "recv completion, opcode 0x%02x\n", opcode);
2032 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2033 iu->buf, wc->byte_len, true);
2038 srp_process_rsp(ch, iu->buf);
2042 srp_process_cred_req(ch, iu->buf);
2046 srp_process_aer_req(ch, iu->buf);
2050 /* XXX Handle target logout */
2051 shost_printk(KERN_WARNING, target->scsi_host,
2052 PFX "Got target logout request\n");
2056 shost_printk(KERN_WARNING, target->scsi_host,
2057 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2061 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2064 res = srp_post_recv(ch, iu);
2066 shost_printk(KERN_ERR, target->scsi_host,
2067 PFX "Recv failed with error code %d\n", res);
2071 * srp_tl_err_work() - handle a transport layer error
2072 * @work: Work structure embedded in an SRP target port.
2074 * Note: This function may get invoked before the rport has been created,
2075 * hence the target->rport test.
2077 static void srp_tl_err_work(struct work_struct *work)
2079 struct srp_target_port *target;
2081 target = container_of(work, struct srp_target_port, tl_err_work);
2083 srp_start_tl_fail_timers(target->rport);
2086 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2089 struct srp_rdma_ch *ch = cq->cq_context;
2090 struct srp_target_port *target = ch->target;
2092 if (ch->connected && !target->qp_in_error) {
2093 shost_printk(KERN_ERR, target->scsi_host,
2094 PFX "failed %s status %s (%d) for CQE %p\n",
2095 opname, ib_wc_status_msg(wc->status), wc->status,
2097 queue_work(system_long_wq, &target->tl_err_work);
2099 target->qp_in_error = true;
2102 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2104 struct srp_target_port *target = host_to_target(shost);
2105 struct srp_rport *rport = target->rport;
2106 struct srp_rdma_ch *ch;
2107 struct srp_request *req;
2109 struct srp_cmd *cmd;
2110 struct ib_device *dev;
2111 unsigned long flags;
2115 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2118 * The SCSI EH thread is the only context from which srp_queuecommand()
2119 * can get invoked for blocked devices (SDEV_BLOCK /
2120 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2121 * locking the rport mutex if invoked from inside the SCSI EH.
2124 mutex_lock(&rport->mutex);
2126 scmnd->result = srp_chkready(target->rport);
2127 if (unlikely(scmnd->result))
2130 WARN_ON_ONCE(scmnd->request->tag < 0);
2131 tag = blk_mq_unique_tag(scmnd->request);
2132 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2133 idx = blk_mq_unique_tag_to_tag(tag);
2134 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2135 dev_name(&shost->shost_gendev), tag, idx,
2136 target->req_ring_size);
2138 spin_lock_irqsave(&ch->lock, flags);
2139 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2140 spin_unlock_irqrestore(&ch->lock, flags);
2145 req = &ch->req_ring[idx];
2146 dev = target->srp_host->srp_dev->dev;
2147 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2150 scmnd->host_scribble = (void *) req;
2153 memset(cmd, 0, sizeof *cmd);
2155 cmd->opcode = SRP_CMD;
2156 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2158 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2163 len = srp_map_data(scmnd, ch, req);
2165 shost_printk(KERN_ERR, target->scsi_host,
2166 PFX "Failed to map data (%d)\n", len);
2168 * If we ran out of memory descriptors (-ENOMEM) because an
2169 * application is queuing many requests with more than
2170 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2171 * to reduce queue depth temporarily.
2173 scmnd->result = len == -ENOMEM ?
2174 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2178 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2181 if (srp_post_send(ch, iu, len)) {
2182 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2183 scmnd->result = DID_ERROR << 16;
2191 mutex_unlock(&rport->mutex);
2196 srp_unmap_data(scmnd, ch, req);
2199 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2202 * Avoid that the loops that iterate over the request ring can
2203 * encounter a dangling SCSI command pointer.
2208 if (scmnd->result) {
2209 scmnd->scsi_done(scmnd);
2212 ret = SCSI_MLQUEUE_HOST_BUSY;
2219 * Note: the resources allocated in this function are freed in
2222 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2224 struct srp_target_port *target = ch->target;
2227 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2231 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2236 for (i = 0; i < target->queue_size; ++i) {
2237 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2239 GFP_KERNEL, DMA_FROM_DEVICE);
2240 if (!ch->rx_ring[i])
2244 for (i = 0; i < target->queue_size; ++i) {
2245 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2247 GFP_KERNEL, DMA_TO_DEVICE);
2248 if (!ch->tx_ring[i])
2251 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2257 for (i = 0; i < target->queue_size; ++i) {
2258 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2259 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2272 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2274 uint64_t T_tr_ns, max_compl_time_ms;
2275 uint32_t rq_tmo_jiffies;
2278 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2279 * table 91), both the QP timeout and the retry count have to be set
2280 * for RC QP's during the RTR to RTS transition.
2282 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2283 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2286 * Set target->rq_tmo_jiffies to one second more than the largest time
2287 * it can take before an error completion is generated. See also
2288 * C9-140..142 in the IBTA spec for more information about how to
2289 * convert the QP Local ACK Timeout value to nanoseconds.
2291 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2292 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2293 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2294 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2296 return rq_tmo_jiffies;
2299 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2300 const struct srp_login_rsp *lrsp,
2301 struct srp_rdma_ch *ch)
2303 struct srp_target_port *target = ch->target;
2304 struct ib_qp_attr *qp_attr = NULL;
2309 if (lrsp->opcode == SRP_LOGIN_RSP) {
2310 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2311 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2314 * Reserve credits for task management so we don't
2315 * bounce requests back to the SCSI mid-layer.
2317 target->scsi_host->can_queue
2318 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2319 target->scsi_host->can_queue);
2320 target->scsi_host->cmd_per_lun
2321 = min_t(int, target->scsi_host->can_queue,
2322 target->scsi_host->cmd_per_lun);
2324 shost_printk(KERN_WARNING, target->scsi_host,
2325 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2331 ret = srp_alloc_iu_bufs(ch);
2337 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2341 qp_attr->qp_state = IB_QPS_RTR;
2342 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2346 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2350 for (i = 0; i < target->queue_size; i++) {
2351 struct srp_iu *iu = ch->rx_ring[i];
2353 ret = srp_post_recv(ch, iu);
2358 qp_attr->qp_state = IB_QPS_RTS;
2359 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2363 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2365 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2369 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2378 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2379 struct ib_cm_event *event,
2380 struct srp_rdma_ch *ch)
2382 struct srp_target_port *target = ch->target;
2383 struct Scsi_Host *shost = target->scsi_host;
2384 struct ib_class_port_info *cpi;
2387 switch (event->param.rej_rcvd.reason) {
2388 case IB_CM_REJ_PORT_CM_REDIRECT:
2389 cpi = event->param.rej_rcvd.ari;
2390 ch->path.dlid = cpi->redirect_lid;
2391 ch->path.pkey = cpi->redirect_pkey;
2392 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2393 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2395 ch->status = ch->path.dlid ?
2396 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2399 case IB_CM_REJ_PORT_REDIRECT:
2400 if (srp_target_is_topspin(target)) {
2402 * Topspin/Cisco SRP gateways incorrectly send
2403 * reject reason code 25 when they mean 24
2406 memcpy(ch->path.dgid.raw,
2407 event->param.rej_rcvd.ari, 16);
2409 shost_printk(KERN_DEBUG, shost,
2410 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2411 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2412 be64_to_cpu(ch->path.dgid.global.interface_id));
2414 ch->status = SRP_PORT_REDIRECT;
2416 shost_printk(KERN_WARNING, shost,
2417 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2418 ch->status = -ECONNRESET;
2422 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2423 shost_printk(KERN_WARNING, shost,
2424 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2425 ch->status = -ECONNRESET;
2428 case IB_CM_REJ_CONSUMER_DEFINED:
2429 opcode = *(u8 *) event->private_data;
2430 if (opcode == SRP_LOGIN_REJ) {
2431 struct srp_login_rej *rej = event->private_data;
2432 u32 reason = be32_to_cpu(rej->reason);
2434 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2435 shost_printk(KERN_WARNING, shost,
2436 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2438 shost_printk(KERN_WARNING, shost, PFX
2439 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2441 target->orig_dgid.raw, reason);
2443 shost_printk(KERN_WARNING, shost,
2444 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2445 " opcode 0x%02x\n", opcode);
2446 ch->status = -ECONNRESET;
2449 case IB_CM_REJ_STALE_CONN:
2450 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2451 ch->status = SRP_STALE_CONN;
2455 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2456 event->param.rej_rcvd.reason);
2457 ch->status = -ECONNRESET;
2461 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2463 struct srp_rdma_ch *ch = cm_id->context;
2464 struct srp_target_port *target = ch->target;
2467 switch (event->event) {
2468 case IB_CM_REQ_ERROR:
2469 shost_printk(KERN_DEBUG, target->scsi_host,
2470 PFX "Sending CM REQ failed\n");
2472 ch->status = -ECONNRESET;
2475 case IB_CM_REP_RECEIVED:
2477 srp_cm_rep_handler(cm_id, event->private_data, ch);
2480 case IB_CM_REJ_RECEIVED:
2481 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2484 srp_cm_rej_handler(cm_id, event, ch);
2487 case IB_CM_DREQ_RECEIVED:
2488 shost_printk(KERN_WARNING, target->scsi_host,
2489 PFX "DREQ received - connection closed\n");
2490 ch->connected = false;
2491 if (ib_send_cm_drep(cm_id, NULL, 0))
2492 shost_printk(KERN_ERR, target->scsi_host,
2493 PFX "Sending CM DREP failed\n");
2494 queue_work(system_long_wq, &target->tl_err_work);
2497 case IB_CM_TIMEWAIT_EXIT:
2498 shost_printk(KERN_ERR, target->scsi_host,
2499 PFX "connection closed\n");
2505 case IB_CM_MRA_RECEIVED:
2506 case IB_CM_DREQ_ERROR:
2507 case IB_CM_DREP_RECEIVED:
2511 shost_printk(KERN_WARNING, target->scsi_host,
2512 PFX "Unhandled CM event %d\n", event->event);
2517 complete(&ch->done);
2523 * srp_change_queue_depth - setting device queue depth
2524 * @sdev: scsi device struct
2525 * @qdepth: requested queue depth
2527 * Returns queue depth.
2530 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2532 if (!sdev->tagged_supported)
2534 return scsi_change_queue_depth(sdev, qdepth);
2537 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2538 u8 func, u8 *status)
2540 struct srp_target_port *target = ch->target;
2541 struct srp_rport *rport = target->rport;
2542 struct ib_device *dev = target->srp_host->srp_dev->dev;
2544 struct srp_tsk_mgmt *tsk_mgmt;
2547 if (!ch->connected || target->qp_in_error)
2551 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2552 * invoked while a task management function is being sent.
2554 mutex_lock(&rport->mutex);
2555 spin_lock_irq(&ch->lock);
2556 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2557 spin_unlock_irq(&ch->lock);
2560 mutex_unlock(&rport->mutex);
2565 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2568 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2570 tsk_mgmt->opcode = SRP_TSK_MGMT;
2571 int_to_scsilun(lun, &tsk_mgmt->lun);
2572 tsk_mgmt->tsk_mgmt_func = func;
2573 tsk_mgmt->task_tag = req_tag;
2575 spin_lock_irq(&ch->lock);
2576 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2577 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2578 spin_unlock_irq(&ch->lock);
2580 init_completion(&ch->tsk_mgmt_done);
2582 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2584 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2585 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2586 mutex_unlock(&rport->mutex);
2590 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2591 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2592 if (res > 0 && status)
2593 *status = ch->tsk_mgmt_status;
2594 mutex_unlock(&rport->mutex);
2596 WARN_ON_ONCE(res < 0);
2598 return res > 0 ? 0 : -1;
2601 static int srp_abort(struct scsi_cmnd *scmnd)
2603 struct srp_target_port *target = host_to_target(scmnd->device->host);
2604 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2607 struct srp_rdma_ch *ch;
2610 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2614 tag = blk_mq_unique_tag(scmnd->request);
2615 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2616 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2618 ch = &target->ch[ch_idx];
2619 if (!srp_claim_req(ch, req, NULL, scmnd))
2621 shost_printk(KERN_ERR, target->scsi_host,
2622 "Sending SRP abort for tag %#x\n", tag);
2623 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2624 SRP_TSK_ABORT_TASK, NULL) == 0)
2626 else if (target->rport->state == SRP_RPORT_LOST)
2630 if (ret == SUCCESS) {
2631 srp_free_req(ch, req, scmnd, 0);
2632 scmnd->result = DID_ABORT << 16;
2633 scmnd->scsi_done(scmnd);
2639 static int srp_reset_device(struct scsi_cmnd *scmnd)
2641 struct srp_target_port *target = host_to_target(scmnd->device->host);
2642 struct srp_rdma_ch *ch;
2645 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2647 ch = &target->ch[0];
2648 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2649 SRP_TSK_LUN_RESET, &status))
2657 static int srp_reset_host(struct scsi_cmnd *scmnd)
2659 struct srp_target_port *target = host_to_target(scmnd->device->host);
2661 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2663 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2666 static int srp_slave_alloc(struct scsi_device *sdev)
2668 struct Scsi_Host *shost = sdev->host;
2669 struct srp_target_port *target = host_to_target(shost);
2670 struct srp_device *srp_dev = target->srp_host->srp_dev;
2673 blk_queue_virt_boundary(sdev->request_queue,
2674 ~srp_dev->mr_page_mask);
2679 static int srp_slave_configure(struct scsi_device *sdev)
2681 struct Scsi_Host *shost = sdev->host;
2682 struct srp_target_port *target = host_to_target(shost);
2683 struct request_queue *q = sdev->request_queue;
2684 unsigned long timeout;
2686 if (sdev->type == TYPE_DISK) {
2687 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2688 blk_queue_rq_timeout(q, timeout);
2694 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2697 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2699 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2702 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2705 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2707 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2710 static ssize_t show_service_id(struct device *dev,
2711 struct device_attribute *attr, char *buf)
2713 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2715 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2718 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2726 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2729 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2731 return sprintf(buf, "%pI6\n", target->sgid.raw);
2734 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2737 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2738 struct srp_rdma_ch *ch = &target->ch[0];
2740 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2743 static ssize_t show_orig_dgid(struct device *dev,
2744 struct device_attribute *attr, char *buf)
2746 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2748 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2751 static ssize_t show_req_lim(struct device *dev,
2752 struct device_attribute *attr, char *buf)
2754 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2755 struct srp_rdma_ch *ch;
2756 int i, req_lim = INT_MAX;
2758 for (i = 0; i < target->ch_count; i++) {
2759 ch = &target->ch[i];
2760 req_lim = min(req_lim, ch->req_lim);
2762 return sprintf(buf, "%d\n", req_lim);
2765 static ssize_t show_zero_req_lim(struct device *dev,
2766 struct device_attribute *attr, char *buf)
2768 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2770 return sprintf(buf, "%d\n", target->zero_req_lim);
2773 static ssize_t show_local_ib_port(struct device *dev,
2774 struct device_attribute *attr, char *buf)
2776 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2778 return sprintf(buf, "%d\n", target->srp_host->port);
2781 static ssize_t show_local_ib_device(struct device *dev,
2782 struct device_attribute *attr, char *buf)
2784 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2786 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2789 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2792 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2794 return sprintf(buf, "%d\n", target->ch_count);
2797 static ssize_t show_comp_vector(struct device *dev,
2798 struct device_attribute *attr, char *buf)
2800 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2802 return sprintf(buf, "%d\n", target->comp_vector);
2805 static ssize_t show_tl_retry_count(struct device *dev,
2806 struct device_attribute *attr, char *buf)
2808 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2810 return sprintf(buf, "%d\n", target->tl_retry_count);
2813 static ssize_t show_cmd_sg_entries(struct device *dev,
2814 struct device_attribute *attr, char *buf)
2816 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2818 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2821 static ssize_t show_allow_ext_sg(struct device *dev,
2822 struct device_attribute *attr, char *buf)
2824 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2826 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2829 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2830 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2831 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2832 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2833 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2834 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2835 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2836 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2837 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2838 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2839 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2840 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2841 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2842 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2843 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2844 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2846 static struct device_attribute *srp_host_attrs[] = {
2849 &dev_attr_service_id,
2853 &dev_attr_orig_dgid,
2855 &dev_attr_zero_req_lim,
2856 &dev_attr_local_ib_port,
2857 &dev_attr_local_ib_device,
2859 &dev_attr_comp_vector,
2860 &dev_attr_tl_retry_count,
2861 &dev_attr_cmd_sg_entries,
2862 &dev_attr_allow_ext_sg,
2866 static struct scsi_host_template srp_template = {
2867 .module = THIS_MODULE,
2868 .name = "InfiniBand SRP initiator",
2869 .proc_name = DRV_NAME,
2870 .slave_alloc = srp_slave_alloc,
2871 .slave_configure = srp_slave_configure,
2872 .info = srp_target_info,
2873 .queuecommand = srp_queuecommand,
2874 .change_queue_depth = srp_change_queue_depth,
2875 .eh_abort_handler = srp_abort,
2876 .eh_device_reset_handler = srp_reset_device,
2877 .eh_host_reset_handler = srp_reset_host,
2878 .skip_settle_delay = true,
2879 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2880 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2882 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2883 .use_clustering = ENABLE_CLUSTERING,
2884 .shost_attrs = srp_host_attrs,
2885 .track_queue_depth = 1,
2888 static int srp_sdev_count(struct Scsi_Host *host)
2890 struct scsi_device *sdev;
2893 shost_for_each_device(sdev, host)
2901 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2902 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2903 * removal has been scheduled.
2904 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2906 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2908 struct srp_rport_identifiers ids;
2909 struct srp_rport *rport;
2911 target->state = SRP_TARGET_SCANNING;
2912 sprintf(target->target_name, "SRP.T10:%016llX",
2913 be64_to_cpu(target->id_ext));
2915 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2918 memcpy(ids.port_id, &target->id_ext, 8);
2919 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2920 ids.roles = SRP_RPORT_ROLE_TARGET;
2921 rport = srp_rport_add(target->scsi_host, &ids);
2922 if (IS_ERR(rport)) {
2923 scsi_remove_host(target->scsi_host);
2924 return PTR_ERR(rport);
2927 rport->lld_data = target;
2928 target->rport = rport;
2930 spin_lock(&host->target_lock);
2931 list_add_tail(&target->list, &host->target_list);
2932 spin_unlock(&host->target_lock);
2934 scsi_scan_target(&target->scsi_host->shost_gendev,
2935 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2937 if (srp_connected_ch(target) < target->ch_count ||
2938 target->qp_in_error) {
2939 shost_printk(KERN_INFO, target->scsi_host,
2940 PFX "SCSI scan failed - removing SCSI host\n");
2941 srp_queue_remove_work(target);
2945 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2946 dev_name(&target->scsi_host->shost_gendev),
2947 srp_sdev_count(target->scsi_host));
2949 spin_lock_irq(&target->lock);
2950 if (target->state == SRP_TARGET_SCANNING)
2951 target->state = SRP_TARGET_LIVE;
2952 spin_unlock_irq(&target->lock);
2958 static void srp_release_dev(struct device *dev)
2960 struct srp_host *host =
2961 container_of(dev, struct srp_host, dev);
2963 complete(&host->released);
2966 static struct class srp_class = {
2967 .name = "infiniband_srp",
2968 .dev_release = srp_release_dev
2972 * srp_conn_unique() - check whether the connection to a target is unique
2974 * @target: SRP target port.
2976 static bool srp_conn_unique(struct srp_host *host,
2977 struct srp_target_port *target)
2979 struct srp_target_port *t;
2982 if (target->state == SRP_TARGET_REMOVED)
2987 spin_lock(&host->target_lock);
2988 list_for_each_entry(t, &host->target_list, list) {
2990 target->id_ext == t->id_ext &&
2991 target->ioc_guid == t->ioc_guid &&
2992 target->initiator_ext == t->initiator_ext) {
2997 spin_unlock(&host->target_lock);
3004 * Target ports are added by writing
3006 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3007 * pkey=<P_Key>,service_id=<service ID>
3009 * to the add_target sysfs attribute.
3013 SRP_OPT_ID_EXT = 1 << 0,
3014 SRP_OPT_IOC_GUID = 1 << 1,
3015 SRP_OPT_DGID = 1 << 2,
3016 SRP_OPT_PKEY = 1 << 3,
3017 SRP_OPT_SERVICE_ID = 1 << 4,
3018 SRP_OPT_MAX_SECT = 1 << 5,
3019 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3020 SRP_OPT_IO_CLASS = 1 << 7,
3021 SRP_OPT_INITIATOR_EXT = 1 << 8,
3022 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3023 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3024 SRP_OPT_SG_TABLESIZE = 1 << 11,
3025 SRP_OPT_COMP_VECTOR = 1 << 12,
3026 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3027 SRP_OPT_QUEUE_SIZE = 1 << 14,
3028 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3032 SRP_OPT_SERVICE_ID),
3035 static const match_table_t srp_opt_tokens = {
3036 { SRP_OPT_ID_EXT, "id_ext=%s" },
3037 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3038 { SRP_OPT_DGID, "dgid=%s" },
3039 { SRP_OPT_PKEY, "pkey=%x" },
3040 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3041 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3042 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3043 { SRP_OPT_IO_CLASS, "io_class=%x" },
3044 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3045 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3046 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3047 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3048 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3049 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3050 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3051 { SRP_OPT_ERR, NULL }
3054 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3056 char *options, *sep_opt;
3059 substring_t args[MAX_OPT_ARGS];
3065 options = kstrdup(buf, GFP_KERNEL);
3070 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3074 token = match_token(p, srp_opt_tokens, args);
3078 case SRP_OPT_ID_EXT:
3079 p = match_strdup(args);
3084 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3088 case SRP_OPT_IOC_GUID:
3089 p = match_strdup(args);
3094 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3099 p = match_strdup(args);
3104 if (strlen(p) != 32) {
3105 pr_warn("bad dest GID parameter '%s'\n", p);
3110 for (i = 0; i < 16; ++i) {
3111 strlcpy(dgid, p + i * 2, sizeof(dgid));
3112 if (sscanf(dgid, "%hhx",
3113 &target->orig_dgid.raw[i]) < 1) {
3123 if (match_hex(args, &token)) {
3124 pr_warn("bad P_Key parameter '%s'\n", p);
3127 target->pkey = cpu_to_be16(token);
3130 case SRP_OPT_SERVICE_ID:
3131 p = match_strdup(args);
3136 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3140 case SRP_OPT_MAX_SECT:
3141 if (match_int(args, &token)) {
3142 pr_warn("bad max sect parameter '%s'\n", p);
3145 target->scsi_host->max_sectors = token;
3148 case SRP_OPT_QUEUE_SIZE:
3149 if (match_int(args, &token) || token < 1) {
3150 pr_warn("bad queue_size parameter '%s'\n", p);
3153 target->scsi_host->can_queue = token;
3154 target->queue_size = token + SRP_RSP_SQ_SIZE +
3155 SRP_TSK_MGMT_SQ_SIZE;
3156 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3157 target->scsi_host->cmd_per_lun = token;
3160 case SRP_OPT_MAX_CMD_PER_LUN:
3161 if (match_int(args, &token) || token < 1) {
3162 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3166 target->scsi_host->cmd_per_lun = token;
3169 case SRP_OPT_IO_CLASS:
3170 if (match_hex(args, &token)) {
3171 pr_warn("bad IO class parameter '%s'\n", p);
3174 if (token != SRP_REV10_IB_IO_CLASS &&
3175 token != SRP_REV16A_IB_IO_CLASS) {
3176 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3177 token, SRP_REV10_IB_IO_CLASS,
3178 SRP_REV16A_IB_IO_CLASS);
3181 target->io_class = token;
3184 case SRP_OPT_INITIATOR_EXT:
3185 p = match_strdup(args);
3190 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3194 case SRP_OPT_CMD_SG_ENTRIES:
3195 if (match_int(args, &token) || token < 1 || token > 255) {
3196 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3200 target->cmd_sg_cnt = token;
3203 case SRP_OPT_ALLOW_EXT_SG:
3204 if (match_int(args, &token)) {
3205 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3208 target->allow_ext_sg = !!token;
3211 case SRP_OPT_SG_TABLESIZE:
3212 if (match_int(args, &token) || token < 1 ||
3213 token > SG_MAX_SEGMENTS) {
3214 pr_warn("bad max sg_tablesize parameter '%s'\n",
3218 target->sg_tablesize = token;
3221 case SRP_OPT_COMP_VECTOR:
3222 if (match_int(args, &token) || token < 0) {
3223 pr_warn("bad comp_vector parameter '%s'\n", p);
3226 target->comp_vector = token;
3229 case SRP_OPT_TL_RETRY_COUNT:
3230 if (match_int(args, &token) || token < 2 || token > 7) {
3231 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3235 target->tl_retry_count = token;
3239 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3245 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3248 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3249 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3250 !(srp_opt_tokens[i].token & opt_mask))
3251 pr_warn("target creation request is missing parameter '%s'\n",
3252 srp_opt_tokens[i].pattern);
3254 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3255 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3256 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3257 target->scsi_host->cmd_per_lun,
3258 target->scsi_host->can_queue);
3265 static ssize_t srp_create_target(struct device *dev,
3266 struct device_attribute *attr,
3267 const char *buf, size_t count)
3269 struct srp_host *host =
3270 container_of(dev, struct srp_host, dev);
3271 struct Scsi_Host *target_host;
3272 struct srp_target_port *target;
3273 struct srp_rdma_ch *ch;
3274 struct srp_device *srp_dev = host->srp_dev;
3275 struct ib_device *ibdev = srp_dev->dev;
3276 int ret, node_idx, node, cpu, i;
3277 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3278 bool multich = false;
3280 target_host = scsi_host_alloc(&srp_template,
3281 sizeof (struct srp_target_port));
3285 target_host->transportt = ib_srp_transport_template;
3286 target_host->max_channel = 0;
3287 target_host->max_id = 1;
3288 target_host->max_lun = -1LL;
3289 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3291 target = host_to_target(target_host);
3293 target->io_class = SRP_REV16A_IB_IO_CLASS;
3294 target->scsi_host = target_host;
3295 target->srp_host = host;
3296 target->pd = host->srp_dev->pd;
3297 target->lkey = host->srp_dev->pd->local_dma_lkey;
3298 target->cmd_sg_cnt = cmd_sg_entries;
3299 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3300 target->allow_ext_sg = allow_ext_sg;
3301 target->tl_retry_count = 7;
3302 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3305 * Avoid that the SCSI host can be removed by srp_remove_target()
3306 * before this function returns.
3308 scsi_host_get(target->scsi_host);
3310 mutex_lock(&host->add_target_mutex);
3312 ret = srp_parse_options(buf, target);
3316 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3318 if (!srp_conn_unique(target->srp_host, target)) {
3319 shost_printk(KERN_INFO, target->scsi_host,
3320 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3321 be64_to_cpu(target->id_ext),
3322 be64_to_cpu(target->ioc_guid),
3323 be64_to_cpu(target->initiator_ext));
3328 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3329 target->cmd_sg_cnt < target->sg_tablesize) {
3330 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3331 target->sg_tablesize = target->cmd_sg_cnt;
3334 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3336 * FR and FMR can only map one HCA page per entry. If the
3337 * start address is not aligned on a HCA page boundary two
3338 * entries will be used for the head and the tail although
3339 * these two entries combined contain at most one HCA page of
3340 * data. Hence the "+ 1" in the calculation below.
3342 * The indirect data buffer descriptor is contiguous so the
3343 * memory for that buffer will only be registered if
3344 * register_always is true. Hence add one to mr_per_cmd if
3345 * register_always has been set.
3347 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3348 (ilog2(srp_dev->mr_page_size) - 9);
3349 mr_per_cmd = register_always +
3350 (target->scsi_host->max_sectors + 1 +
3351 max_sectors_per_mr - 1) / max_sectors_per_mr;
3352 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3353 target->scsi_host->max_sectors,
3354 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3355 max_sectors_per_mr, mr_per_cmd);
3358 target_host->sg_tablesize = target->sg_tablesize;
3359 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3360 target->mr_per_cmd = mr_per_cmd;
3361 target->indirect_size = target->sg_tablesize *
3362 sizeof (struct srp_direct_buf);
3363 target->max_iu_len = sizeof (struct srp_cmd) +
3364 sizeof (struct srp_indirect_buf) +
3365 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3367 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3368 INIT_WORK(&target->remove_work, srp_remove_work);
3369 spin_lock_init(&target->lock);
3370 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3375 target->ch_count = max_t(unsigned, num_online_nodes(),
3377 min(4 * num_online_nodes(),
3378 ibdev->num_comp_vectors),
3379 num_online_cpus()));
3380 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3386 for_each_online_node(node) {
3387 const int ch_start = (node_idx * target->ch_count /
3388 num_online_nodes());
3389 const int ch_end = ((node_idx + 1) * target->ch_count /
3390 num_online_nodes());
3391 const int cv_start = node_idx * ibdev->num_comp_vectors /
3393 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3397 for_each_online_cpu(cpu) {
3398 if (cpu_to_node(cpu) != node)
3400 if (ch_start + cpu_idx >= ch_end)
3402 ch = &target->ch[ch_start + cpu_idx];
3403 ch->target = target;
3404 ch->comp_vector = cv_start == cv_end ? cv_start :
3405 cv_start + cpu_idx % (cv_end - cv_start);
3406 spin_lock_init(&ch->lock);
3407 INIT_LIST_HEAD(&ch->free_tx);
3408 ret = srp_new_cm_id(ch);
3410 goto err_disconnect;
3412 ret = srp_create_ch_ib(ch);
3414 goto err_disconnect;
3416 ret = srp_alloc_req_data(ch);
3418 goto err_disconnect;
3420 ret = srp_connect_ch(ch, multich);
3422 shost_printk(KERN_ERR, target->scsi_host,
3423 PFX "Connection %d/%d failed\n",
3426 if (node_idx == 0 && cpu_idx == 0) {
3427 goto err_disconnect;
3429 srp_free_ch_ib(target, ch);
3430 srp_free_req_data(target, ch);
3431 target->ch_count = ch - target->ch;
3443 target->scsi_host->nr_hw_queues = target->ch_count;
3445 ret = srp_add_target(host, target);
3447 goto err_disconnect;
3449 if (target->state != SRP_TARGET_REMOVED) {
3450 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3451 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3452 be64_to_cpu(target->id_ext),
3453 be64_to_cpu(target->ioc_guid),
3454 be16_to_cpu(target->pkey),
3455 be64_to_cpu(target->service_id),
3456 target->sgid.raw, target->orig_dgid.raw);
3462 mutex_unlock(&host->add_target_mutex);
3464 scsi_host_put(target->scsi_host);
3466 scsi_host_put(target->scsi_host);
3471 srp_disconnect_target(target);
3473 for (i = 0; i < target->ch_count; i++) {
3474 ch = &target->ch[i];
3475 srp_free_ch_ib(target, ch);
3476 srp_free_req_data(target, ch);
3483 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3485 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3488 struct srp_host *host = container_of(dev, struct srp_host, dev);
3490 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3493 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3495 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3498 struct srp_host *host = container_of(dev, struct srp_host, dev);
3500 return sprintf(buf, "%d\n", host->port);
3503 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3505 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3507 struct srp_host *host;
3509 host = kzalloc(sizeof *host, GFP_KERNEL);
3513 INIT_LIST_HEAD(&host->target_list);
3514 spin_lock_init(&host->target_lock);
3515 init_completion(&host->released);
3516 mutex_init(&host->add_target_mutex);
3517 host->srp_dev = device;
3520 host->dev.class = &srp_class;
3521 host->dev.parent = device->dev->dma_device;
3522 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3524 if (device_register(&host->dev))
3526 if (device_create_file(&host->dev, &dev_attr_add_target))
3528 if (device_create_file(&host->dev, &dev_attr_ibdev))
3530 if (device_create_file(&host->dev, &dev_attr_port))
3536 device_unregister(&host->dev);
3544 static void srp_add_one(struct ib_device *device)
3546 struct srp_device *srp_dev;
3547 struct srp_host *host;
3548 int mr_page_shift, p;
3549 u64 max_pages_per_mr;
3550 unsigned int flags = 0;
3552 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3557 * Use the smallest page size supported by the HCA, down to a
3558 * minimum of 4096 bytes. We're unlikely to build large sglists
3559 * out of smaller entries.
3561 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
3562 srp_dev->mr_page_size = 1 << mr_page_shift;
3563 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3564 max_pages_per_mr = device->attrs.max_mr_size;
3565 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3566 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3567 device->attrs.max_mr_size, srp_dev->mr_page_size,
3568 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3569 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3572 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3573 device->map_phys_fmr && device->unmap_fmr);
3574 srp_dev->has_fr = (device->attrs.device_cap_flags &
3575 IB_DEVICE_MEM_MGT_EXTENSIONS);
3576 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3577 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3578 } else if (!never_register &&
3579 device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3580 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3581 (!srp_dev->has_fmr || prefer_fr));
3582 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3585 if (never_register || !register_always ||
3586 (!srp_dev->has_fmr && !srp_dev->has_fr))
3587 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3589 if (srp_dev->use_fast_reg) {
3590 srp_dev->max_pages_per_mr =
3591 min_t(u32, srp_dev->max_pages_per_mr,
3592 device->attrs.max_fast_reg_page_list_len);
3594 srp_dev->mr_max_size = srp_dev->mr_page_size *
3595 srp_dev->max_pages_per_mr;
3596 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3597 device->name, mr_page_shift, device->attrs.max_mr_size,
3598 device->attrs.max_fast_reg_page_list_len,
3599 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3601 INIT_LIST_HEAD(&srp_dev->dev_list);
3603 srp_dev->dev = device;
3604 srp_dev->pd = ib_alloc_pd(device, flags);
3605 if (IS_ERR(srp_dev->pd))
3609 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3610 host = srp_add_port(srp_dev, p);
3612 list_add_tail(&host->list, &srp_dev->dev_list);
3615 ib_set_client_data(device, &srp_client, srp_dev);
3622 static void srp_remove_one(struct ib_device *device, void *client_data)
3624 struct srp_device *srp_dev;
3625 struct srp_host *host, *tmp_host;
3626 struct srp_target_port *target;
3628 srp_dev = client_data;
3632 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3633 device_unregister(&host->dev);
3635 * Wait for the sysfs entry to go away, so that no new
3636 * target ports can be created.
3638 wait_for_completion(&host->released);
3641 * Remove all target ports.
3643 spin_lock(&host->target_lock);
3644 list_for_each_entry(target, &host->target_list, list)
3645 srp_queue_remove_work(target);
3646 spin_unlock(&host->target_lock);
3649 * srp_queue_remove_work() queues a call to
3650 * srp_remove_target(). The latter function cancels
3651 * target->tl_err_work so waiting for the remove works to
3652 * finish is sufficient.
3654 flush_workqueue(srp_remove_wq);
3659 ib_dealloc_pd(srp_dev->pd);
3664 static struct srp_function_template ib_srp_transport_functions = {
3665 .has_rport_state = true,
3666 .reset_timer_if_blocked = true,
3667 .reconnect_delay = &srp_reconnect_delay,
3668 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3669 .dev_loss_tmo = &srp_dev_loss_tmo,
3670 .reconnect = srp_rport_reconnect,
3671 .rport_delete = srp_rport_delete,
3672 .terminate_rport_io = srp_terminate_io,
3675 static int __init srp_init_module(void)
3679 if (srp_sg_tablesize) {
3680 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3681 if (!cmd_sg_entries)
3682 cmd_sg_entries = srp_sg_tablesize;
3685 if (!cmd_sg_entries)
3686 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3688 if (cmd_sg_entries > 255) {
3689 pr_warn("Clamping cmd_sg_entries to 255\n");
3690 cmd_sg_entries = 255;
3693 if (!indirect_sg_entries)
3694 indirect_sg_entries = cmd_sg_entries;
3695 else if (indirect_sg_entries < cmd_sg_entries) {
3696 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3698 indirect_sg_entries = cmd_sg_entries;
3701 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3702 pr_warn("Clamping indirect_sg_entries to %u\n",
3704 indirect_sg_entries = SG_MAX_SEGMENTS;
3707 srp_remove_wq = create_workqueue("srp_remove");
3708 if (!srp_remove_wq) {
3714 ib_srp_transport_template =
3715 srp_attach_transport(&ib_srp_transport_functions);
3716 if (!ib_srp_transport_template)
3719 ret = class_register(&srp_class);
3721 pr_err("couldn't register class infiniband_srp\n");
3725 ib_sa_register_client(&srp_sa_client);
3727 ret = ib_register_client(&srp_client);
3729 pr_err("couldn't register IB client\n");
3737 ib_sa_unregister_client(&srp_sa_client);
3738 class_unregister(&srp_class);
3741 srp_release_transport(ib_srp_transport_template);
3744 destroy_workqueue(srp_remove_wq);
3748 static void __exit srp_cleanup_module(void)
3750 ib_unregister_client(&srp_client);
3751 ib_sa_unregister_client(&srp_sa_client);
3752 class_unregister(&srp_class);
3753 srp_release_transport(ib_srp_transport_template);
3754 destroy_workqueue(srp_remove_wq);
3757 module_init(srp_init_module);
3758 module_exit(srp_cleanup_module);