2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
54 #include <scsi/scsi_transport_srp.h>
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
65 #if !defined(CONFIG_DYNAMIC_DEBUG)
66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 static unsigned int srp_sg_tablesize;
71 static unsigned int cmd_sg_entries;
72 static unsigned int indirect_sg_entries;
73 static bool allow_ext_sg;
74 static bool prefer_fr = true;
75 static bool register_always = true;
76 static bool never_register;
77 static int topspin_workarounds = 1;
79 module_param(srp_sg_tablesize, uint, 0444);
80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
82 module_param(cmd_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(cmd_sg_entries,
84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
86 module_param(indirect_sg_entries, uint, 0444);
87 MODULE_PARM_DESC(indirect_sg_entries,
88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
90 module_param(allow_ext_sg, bool, 0444);
91 MODULE_PARM_DESC(allow_ext_sg,
92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
94 module_param(topspin_workarounds, int, 0444);
95 MODULE_PARM_DESC(topspin_workarounds,
96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
98 module_param(prefer_fr, bool, 0444);
99 MODULE_PARM_DESC(prefer_fr,
100 "Whether to use fast registration if both FMR and fast registration are supported");
102 module_param(register_always, bool, 0444);
103 MODULE_PARM_DESC(register_always,
104 "Use memory registration even for contiguous memory regions");
106 module_param(never_register, bool, 0444);
107 MODULE_PARM_DESC(never_register, "Never register memory");
109 static const struct kernel_param_ops srp_tmo_ops;
111 static int srp_reconnect_delay = 10;
112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
116 static int srp_fast_io_fail_tmo = 15;
117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
119 MODULE_PARM_DESC(fast_io_fail_tmo,
120 "Number of seconds between the observation of a transport"
121 " layer error and failing all I/O. \"off\" means that this"
122 " functionality is disabled.");
124 static int srp_dev_loss_tmo = 600;
125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
127 MODULE_PARM_DESC(dev_loss_tmo,
128 "Maximum number of seconds that the SRP transport should"
129 " insulate transport layer errors. After this time has been"
130 " exceeded the SCSI host is removed. Should be"
131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
132 " if fast_io_fail_tmo has not been set. \"off\" means that"
133 " this functionality is disabled.");
135 static unsigned ch_count;
136 module_param(ch_count, uint, 0444);
137 MODULE_PARM_DESC(ch_count,
138 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
140 static void srp_add_one(struct ib_device *device);
141 static void srp_remove_one(struct ib_device *device, void *client_data);
142 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
143 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
145 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
146 const struct ib_cm_event *event);
147 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
148 struct rdma_cm_event *event);
150 static struct scsi_transport_template *ib_srp_transport_template;
151 static struct workqueue_struct *srp_remove_wq;
153 static struct ib_client srp_client = {
156 .remove = srp_remove_one
159 static struct ib_sa_client srp_sa_client;
161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
163 int tmo = *(int *)kp->arg;
166 return sprintf(buffer, "%d", tmo);
168 return sprintf(buffer, "off");
171 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
175 res = srp_parse_tmo(&tmo, val);
179 if (kp->arg == &srp_reconnect_delay)
180 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
182 else if (kp->arg == &srp_fast_io_fail_tmo)
183 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
185 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
189 *(int *)kp->arg = tmo;
195 static const struct kernel_param_ops srp_tmo_ops = {
200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
202 return (struct srp_target_port *) host->hostdata;
205 static const char *srp_target_info(struct Scsi_Host *host)
207 return host_to_target(host)->target_name;
210 static int srp_target_is_topspin(struct srp_target_port *target)
212 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
213 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
215 return topspin_workarounds &&
216 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
217 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
222 enum dma_data_direction direction)
226 iu = kmalloc(sizeof *iu, gfp_mask);
230 iu->buf = kzalloc(size, gfp_mask);
234 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
236 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
240 iu->direction = direction;
252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
257 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
263 static void srp_qp_event(struct ib_event *event, void *context)
265 pr_debug("QP event %s (%d)\n",
266 ib_event_msg(event->event), event->event);
269 static int srp_init_ib_qp(struct srp_target_port *target,
272 struct ib_qp_attr *attr;
275 attr = kmalloc(sizeof *attr, GFP_KERNEL);
279 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
280 target->srp_host->port,
281 be16_to_cpu(target->ib_cm.pkey),
286 attr->qp_state = IB_QPS_INIT;
287 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
288 IB_ACCESS_REMOTE_WRITE);
289 attr->port_num = target->srp_host->port;
291 ret = ib_modify_qp(qp, attr,
302 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
304 struct srp_target_port *target = ch->target;
305 struct ib_cm_id *new_cm_id;
307 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
308 srp_ib_cm_handler, ch);
309 if (IS_ERR(new_cm_id))
310 return PTR_ERR(new_cm_id);
313 ib_destroy_cm_id(ch->ib_cm.cm_id);
314 ch->ib_cm.cm_id = new_cm_id;
315 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
316 target->srp_host->port))
317 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
320 ch->ib_cm.path.sgid = target->sgid;
321 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
322 ch->ib_cm.path.pkey = target->ib_cm.pkey;
323 ch->ib_cm.path.service_id = target->ib_cm.service_id;
328 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
330 struct srp_target_port *target = ch->target;
331 struct rdma_cm_id *new_cm_id;
334 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
335 RDMA_PS_TCP, IB_QPT_RC);
336 if (IS_ERR(new_cm_id)) {
337 ret = PTR_ERR(new_cm_id);
342 init_completion(&ch->done);
343 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
344 (struct sockaddr *)&target->rdma_cm.src : NULL,
345 (struct sockaddr *)&target->rdma_cm.dst,
346 SRP_PATH_REC_TIMEOUT_MS);
348 pr_err("No route available from %pIS to %pIS (%d)\n",
349 &target->rdma_cm.src, &target->rdma_cm.dst, ret);
352 ret = wait_for_completion_interruptible(&ch->done);
358 pr_err("Resolving address %pIS failed (%d)\n",
359 &target->rdma_cm.dst, ret);
363 swap(ch->rdma_cm.cm_id, new_cm_id);
367 rdma_destroy_id(new_cm_id);
372 static int srp_new_cm_id(struct srp_rdma_ch *ch)
374 struct srp_target_port *target = ch->target;
376 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
377 srp_new_ib_cm_id(ch);
380 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
382 struct srp_device *dev = target->srp_host->srp_dev;
383 struct ib_fmr_pool_param fmr_param;
385 memset(&fmr_param, 0, sizeof(fmr_param));
386 fmr_param.pool_size = target->mr_pool_size;
387 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
389 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
390 fmr_param.page_shift = ilog2(dev->mr_page_size);
391 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
392 IB_ACCESS_REMOTE_WRITE |
393 IB_ACCESS_REMOTE_READ);
395 return ib_create_fmr_pool(dev->pd, &fmr_param);
399 * srp_destroy_fr_pool() - free the resources owned by a pool
400 * @pool: Fast registration pool to be destroyed.
402 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
405 struct srp_fr_desc *d;
410 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
418 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
419 * @device: IB device to allocate fast registration descriptors for.
420 * @pd: Protection domain associated with the FR descriptors.
421 * @pool_size: Number of descriptors to allocate.
422 * @max_page_list_len: Maximum fast registration work request page list length.
424 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
425 struct ib_pd *pd, int pool_size,
426 int max_page_list_len)
428 struct srp_fr_pool *pool;
429 struct srp_fr_desc *d;
431 int i, ret = -EINVAL;
432 enum ib_mr_type mr_type;
437 pool = kzalloc(sizeof(struct srp_fr_pool) +
438 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
441 pool->size = pool_size;
442 pool->max_page_list_len = max_page_list_len;
443 spin_lock_init(&pool->lock);
444 INIT_LIST_HEAD(&pool->free_list);
446 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
447 mr_type = IB_MR_TYPE_SG_GAPS;
449 mr_type = IB_MR_TYPE_MEM_REG;
451 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
452 mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
456 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
457 dev_name(&device->dev));
461 list_add_tail(&d->entry, &pool->free_list);
468 srp_destroy_fr_pool(pool);
476 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
477 * @pool: Pool to obtain descriptor from.
479 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
481 struct srp_fr_desc *d = NULL;
484 spin_lock_irqsave(&pool->lock, flags);
485 if (!list_empty(&pool->free_list)) {
486 d = list_first_entry(&pool->free_list, typeof(*d), entry);
489 spin_unlock_irqrestore(&pool->lock, flags);
495 * srp_fr_pool_put() - put an FR descriptor back in the free list
496 * @pool: Pool the descriptor was allocated from.
497 * @desc: Pointer to an array of fast registration descriptor pointers.
498 * @n: Number of descriptors to put back.
500 * Note: The caller must already have queued an invalidation request for
501 * desc->mr->rkey before calling this function.
503 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
509 spin_lock_irqsave(&pool->lock, flags);
510 for (i = 0; i < n; i++)
511 list_add(&desc[i]->entry, &pool->free_list);
512 spin_unlock_irqrestore(&pool->lock, flags);
515 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
517 struct srp_device *dev = target->srp_host->srp_dev;
519 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
520 dev->max_pages_per_mr);
524 * srp_destroy_qp() - destroy an RDMA queue pair
525 * @ch: SRP RDMA channel.
527 * Drain the qp before destroying it. This avoids that the receive
528 * completion handler can access the queue pair while it is
531 static void srp_destroy_qp(struct srp_rdma_ch *ch)
533 spin_lock_irq(&ch->lock);
534 ib_process_cq_direct(ch->send_cq, -1);
535 spin_unlock_irq(&ch->lock);
538 ib_destroy_qp(ch->qp);
541 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
543 struct srp_target_port *target = ch->target;
544 struct srp_device *dev = target->srp_host->srp_dev;
545 struct ib_qp_init_attr *init_attr;
546 struct ib_cq *recv_cq, *send_cq;
548 struct ib_fmr_pool *fmr_pool = NULL;
549 struct srp_fr_pool *fr_pool = NULL;
550 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
553 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
557 /* queue_size + 1 for ib_drain_rq() */
558 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
559 ch->comp_vector, IB_POLL_SOFTIRQ);
560 if (IS_ERR(recv_cq)) {
561 ret = PTR_ERR(recv_cq);
565 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
566 ch->comp_vector, IB_POLL_DIRECT);
567 if (IS_ERR(send_cq)) {
568 ret = PTR_ERR(send_cq);
572 init_attr->event_handler = srp_qp_event;
573 init_attr->cap.max_send_wr = m * target->queue_size;
574 init_attr->cap.max_recv_wr = target->queue_size + 1;
575 init_attr->cap.max_recv_sge = 1;
576 init_attr->cap.max_send_sge = 1;
577 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
578 init_attr->qp_type = IB_QPT_RC;
579 init_attr->send_cq = send_cq;
580 init_attr->recv_cq = recv_cq;
582 if (target->using_rdma_cm) {
583 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
584 qp = ch->rdma_cm.cm_id->qp;
586 qp = ib_create_qp(dev->pd, init_attr);
588 ret = srp_init_ib_qp(target, qp);
596 pr_err("QP creation failed for dev %s: %d\n",
597 dev_name(&dev->dev->dev), ret);
601 if (dev->use_fast_reg) {
602 fr_pool = srp_alloc_fr_pool(target);
603 if (IS_ERR(fr_pool)) {
604 ret = PTR_ERR(fr_pool);
605 shost_printk(KERN_WARNING, target->scsi_host, PFX
606 "FR pool allocation failed (%d)\n", ret);
609 } else if (dev->use_fmr) {
610 fmr_pool = srp_alloc_fmr_pool(target);
611 if (IS_ERR(fmr_pool)) {
612 ret = PTR_ERR(fmr_pool);
613 shost_printk(KERN_WARNING, target->scsi_host, PFX
614 "FMR pool allocation failed (%d)\n", ret);
622 ib_free_cq(ch->recv_cq);
624 ib_free_cq(ch->send_cq);
627 ch->recv_cq = recv_cq;
628 ch->send_cq = send_cq;
630 if (dev->use_fast_reg) {
632 srp_destroy_fr_pool(ch->fr_pool);
633 ch->fr_pool = fr_pool;
634 } else if (dev->use_fmr) {
636 ib_destroy_fmr_pool(ch->fmr_pool);
637 ch->fmr_pool = fmr_pool;
644 if (target->using_rdma_cm)
645 rdma_destroy_qp(ch->rdma_cm.cm_id);
661 * Note: this function may be called without srp_alloc_iu_bufs() having been
662 * invoked. Hence the ch->[rt]x_ring checks.
664 static void srp_free_ch_ib(struct srp_target_port *target,
665 struct srp_rdma_ch *ch)
667 struct srp_device *dev = target->srp_host->srp_dev;
673 if (target->using_rdma_cm) {
674 if (ch->rdma_cm.cm_id) {
675 rdma_destroy_id(ch->rdma_cm.cm_id);
676 ch->rdma_cm.cm_id = NULL;
679 if (ch->ib_cm.cm_id) {
680 ib_destroy_cm_id(ch->ib_cm.cm_id);
681 ch->ib_cm.cm_id = NULL;
685 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
689 if (dev->use_fast_reg) {
691 srp_destroy_fr_pool(ch->fr_pool);
692 } else if (dev->use_fmr) {
694 ib_destroy_fmr_pool(ch->fmr_pool);
698 ib_free_cq(ch->send_cq);
699 ib_free_cq(ch->recv_cq);
702 * Avoid that the SCSI error handler tries to use this channel after
703 * it has been freed. The SCSI error handler can namely continue
704 * trying to perform recovery actions after scsi_remove_host()
710 ch->send_cq = ch->recv_cq = NULL;
713 for (i = 0; i < target->queue_size; ++i)
714 srp_free_iu(target->srp_host, ch->rx_ring[i]);
719 for (i = 0; i < target->queue_size; ++i)
720 srp_free_iu(target->srp_host, ch->tx_ring[i]);
726 static void srp_path_rec_completion(int status,
727 struct sa_path_rec *pathrec,
730 struct srp_rdma_ch *ch = ch_ptr;
731 struct srp_target_port *target = ch->target;
735 shost_printk(KERN_ERR, target->scsi_host,
736 PFX "Got failed path rec status %d\n", status);
738 ch->ib_cm.path = *pathrec;
742 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
744 struct srp_target_port *target = ch->target;
747 ch->ib_cm.path.numb_path = 1;
749 init_completion(&ch->done);
751 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
752 target->srp_host->srp_dev->dev,
753 target->srp_host->port,
755 IB_SA_PATH_REC_SERVICE_ID |
756 IB_SA_PATH_REC_DGID |
757 IB_SA_PATH_REC_SGID |
758 IB_SA_PATH_REC_NUMB_PATH |
760 SRP_PATH_REC_TIMEOUT_MS,
762 srp_path_rec_completion,
763 ch, &ch->ib_cm.path_query);
764 if (ch->ib_cm.path_query_id < 0)
765 return ch->ib_cm.path_query_id;
767 ret = wait_for_completion_interruptible(&ch->done);
772 shost_printk(KERN_WARNING, target->scsi_host,
773 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
774 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
775 be16_to_cpu(target->ib_cm.pkey),
776 be64_to_cpu(target->ib_cm.service_id));
781 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
783 struct srp_target_port *target = ch->target;
786 init_completion(&ch->done);
788 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
792 wait_for_completion_interruptible(&ch->done);
795 shost_printk(KERN_WARNING, target->scsi_host,
796 PFX "Path resolution failed\n");
801 static int srp_lookup_path(struct srp_rdma_ch *ch)
803 struct srp_target_port *target = ch->target;
805 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
806 srp_ib_lookup_path(ch);
809 static u8 srp_get_subnet_timeout(struct srp_host *host)
811 struct ib_port_attr attr;
813 u8 subnet_timeout = 18;
815 ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
817 subnet_timeout = attr.subnet_timeout;
819 if (unlikely(subnet_timeout < 15))
820 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
821 dev_name(&host->srp_dev->dev->dev), subnet_timeout);
823 return subnet_timeout;
826 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
828 struct srp_target_port *target = ch->target;
830 struct rdma_conn_param rdma_param;
831 struct srp_login_req_rdma rdma_req;
832 struct ib_cm_req_param ib_param;
833 struct srp_login_req ib_req;
838 req = kzalloc(sizeof *req, GFP_KERNEL);
842 req->ib_param.flow_control = 1;
843 req->ib_param.retry_count = target->tl_retry_count;
846 * Pick some arbitrary defaults here; we could make these
847 * module parameters if anyone cared about setting them.
849 req->ib_param.responder_resources = 4;
850 req->ib_param.rnr_retry_count = 7;
851 req->ib_param.max_cm_retries = 15;
853 req->ib_req.opcode = SRP_LOGIN_REQ;
855 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
856 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
857 SRP_BUF_FORMAT_INDIRECT);
858 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
859 SRP_MULTICHAN_SINGLE);
861 if (target->using_rdma_cm) {
862 req->rdma_param.flow_control = req->ib_param.flow_control;
863 req->rdma_param.responder_resources =
864 req->ib_param.responder_resources;
865 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
866 req->rdma_param.retry_count = req->ib_param.retry_count;
867 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
868 req->rdma_param.private_data = &req->rdma_req;
869 req->rdma_param.private_data_len = sizeof(req->rdma_req);
871 req->rdma_req.opcode = req->ib_req.opcode;
872 req->rdma_req.tag = req->ib_req.tag;
873 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
874 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
875 req->rdma_req.req_flags = req->ib_req.req_flags;
877 ipi = req->rdma_req.initiator_port_id;
878 tpi = req->rdma_req.target_port_id;
882 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
884 req->ib_param.primary_path = &ch->ib_cm.path;
885 req->ib_param.alternate_path = NULL;
886 req->ib_param.service_id = target->ib_cm.service_id;
887 get_random_bytes(&req->ib_param.starting_psn, 4);
888 req->ib_param.starting_psn &= 0xffffff;
889 req->ib_param.qp_num = ch->qp->qp_num;
890 req->ib_param.qp_type = ch->qp->qp_type;
891 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
892 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
893 req->ib_param.private_data = &req->ib_req;
894 req->ib_param.private_data_len = sizeof(req->ib_req);
896 ipi = req->ib_req.initiator_port_id;
897 tpi = req->ib_req.target_port_id;
901 * In the published SRP specification (draft rev. 16a), the
902 * port identifier format is 8 bytes of ID extension followed
903 * by 8 bytes of GUID. Older drafts put the two halves in the
904 * opposite order, so that the GUID comes first.
906 * Targets conforming to these obsolete drafts can be
907 * recognized by the I/O Class they report.
909 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
910 memcpy(ipi, &target->sgid.global.interface_id, 8);
911 memcpy(ipi + 8, &target->initiator_ext, 8);
912 memcpy(tpi, &target->ioc_guid, 8);
913 memcpy(tpi + 8, &target->id_ext, 8);
915 memcpy(ipi, &target->initiator_ext, 8);
916 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
917 memcpy(tpi, &target->id_ext, 8);
918 memcpy(tpi + 8, &target->ioc_guid, 8);
922 * Topspin/Cisco SRP targets will reject our login unless we
923 * zero out the first 8 bytes of our initiator port ID and set
924 * the second 8 bytes to the local node GUID.
926 if (srp_target_is_topspin(target)) {
927 shost_printk(KERN_DEBUG, target->scsi_host,
928 PFX "Topspin/Cisco initiator port ID workaround "
929 "activated for target GUID %016llx\n",
930 be64_to_cpu(target->ioc_guid));
932 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
935 if (target->using_rdma_cm)
936 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
938 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
945 static bool srp_queue_remove_work(struct srp_target_port *target)
947 bool changed = false;
949 spin_lock_irq(&target->lock);
950 if (target->state != SRP_TARGET_REMOVED) {
951 target->state = SRP_TARGET_REMOVED;
954 spin_unlock_irq(&target->lock);
957 queue_work(srp_remove_wq, &target->remove_work);
962 static void srp_disconnect_target(struct srp_target_port *target)
964 struct srp_rdma_ch *ch;
967 /* XXX should send SRP_I_LOGOUT request */
969 for (i = 0; i < target->ch_count; i++) {
971 ch->connected = false;
973 if (target->using_rdma_cm) {
974 if (ch->rdma_cm.cm_id)
975 rdma_disconnect(ch->rdma_cm.cm_id);
978 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
982 shost_printk(KERN_DEBUG, target->scsi_host,
983 PFX "Sending CM DREQ failed\n");
988 static void srp_free_req_data(struct srp_target_port *target,
989 struct srp_rdma_ch *ch)
991 struct srp_device *dev = target->srp_host->srp_dev;
992 struct ib_device *ibdev = dev->dev;
993 struct srp_request *req;
999 for (i = 0; i < target->req_ring_size; ++i) {
1000 req = &ch->req_ring[i];
1001 if (dev->use_fast_reg) {
1002 kfree(req->fr_list);
1004 kfree(req->fmr_list);
1005 kfree(req->map_page);
1007 if (req->indirect_dma_addr) {
1008 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1009 target->indirect_size,
1012 kfree(req->indirect_desc);
1015 kfree(ch->req_ring);
1016 ch->req_ring = NULL;
1019 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1021 struct srp_target_port *target = ch->target;
1022 struct srp_device *srp_dev = target->srp_host->srp_dev;
1023 struct ib_device *ibdev = srp_dev->dev;
1024 struct srp_request *req;
1026 dma_addr_t dma_addr;
1027 int i, ret = -ENOMEM;
1029 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1034 for (i = 0; i < target->req_ring_size; ++i) {
1035 req = &ch->req_ring[i];
1036 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1040 if (srp_dev->use_fast_reg) {
1041 req->fr_list = mr_list;
1043 req->fmr_list = mr_list;
1044 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1050 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1051 if (!req->indirect_desc)
1054 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1055 target->indirect_size,
1057 if (ib_dma_mapping_error(ibdev, dma_addr))
1060 req->indirect_dma_addr = dma_addr;
1069 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1070 * @shost: SCSI host whose attributes to remove from sysfs.
1072 * Note: Any attributes defined in the host template and that did not exist
1073 * before invocation of this function will be ignored.
1075 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1077 struct device_attribute **attr;
1079 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1080 device_remove_file(&shost->shost_dev, *attr);
1083 static void srp_remove_target(struct srp_target_port *target)
1085 struct srp_rdma_ch *ch;
1088 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1090 srp_del_scsi_host_attr(target->scsi_host);
1091 srp_rport_get(target->rport);
1092 srp_remove_host(target->scsi_host);
1093 scsi_remove_host(target->scsi_host);
1094 srp_stop_rport_timers(target->rport);
1095 srp_disconnect_target(target);
1096 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1097 for (i = 0; i < target->ch_count; i++) {
1098 ch = &target->ch[i];
1099 srp_free_ch_ib(target, ch);
1101 cancel_work_sync(&target->tl_err_work);
1102 srp_rport_put(target->rport);
1103 for (i = 0; i < target->ch_count; i++) {
1104 ch = &target->ch[i];
1105 srp_free_req_data(target, ch);
1110 spin_lock(&target->srp_host->target_lock);
1111 list_del(&target->list);
1112 spin_unlock(&target->srp_host->target_lock);
1114 scsi_host_put(target->scsi_host);
1117 static void srp_remove_work(struct work_struct *work)
1119 struct srp_target_port *target =
1120 container_of(work, struct srp_target_port, remove_work);
1122 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1124 srp_remove_target(target);
1127 static void srp_rport_delete(struct srp_rport *rport)
1129 struct srp_target_port *target = rport->lld_data;
1131 srp_queue_remove_work(target);
1135 * srp_connected_ch() - number of connected channels
1136 * @target: SRP target port.
1138 static int srp_connected_ch(struct srp_target_port *target)
1142 for (i = 0; i < target->ch_count; i++)
1143 c += target->ch[i].connected;
1148 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
1150 struct srp_target_port *target = ch->target;
1153 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1155 ret = srp_lookup_path(ch);
1160 init_completion(&ch->done);
1161 ret = srp_send_req(ch, multich);
1164 ret = wait_for_completion_interruptible(&ch->done);
1169 * The CM event handling code will set status to
1170 * SRP_PORT_REDIRECT if we get a port redirect REJ
1171 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1172 * redirect REJ back.
1177 ch->connected = true;
1180 case SRP_PORT_REDIRECT:
1181 ret = srp_lookup_path(ch);
1186 case SRP_DLID_REDIRECT:
1189 case SRP_STALE_CONN:
1190 shost_printk(KERN_ERR, target->scsi_host, PFX
1191 "giving up on stale connection\n");
1201 return ret <= 0 ? ret : -ENODEV;
1204 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1206 srp_handle_qp_err(cq, wc, "INV RKEY");
1209 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1212 struct ib_send_wr wr = {
1213 .opcode = IB_WR_LOCAL_INV,
1217 .ex.invalidate_rkey = rkey,
1220 wr.wr_cqe = &req->reg_cqe;
1221 req->reg_cqe.done = srp_inv_rkey_err_done;
1222 return ib_post_send(ch->qp, &wr, NULL);
1225 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1226 struct srp_rdma_ch *ch,
1227 struct srp_request *req)
1229 struct srp_target_port *target = ch->target;
1230 struct srp_device *dev = target->srp_host->srp_dev;
1231 struct ib_device *ibdev = dev->dev;
1234 if (!scsi_sglist(scmnd) ||
1235 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1236 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1239 if (dev->use_fast_reg) {
1240 struct srp_fr_desc **pfr;
1242 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1243 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1245 shost_printk(KERN_ERR, target->scsi_host, PFX
1246 "Queueing INV WR for rkey %#x failed (%d)\n",
1247 (*pfr)->mr->rkey, res);
1248 queue_work(system_long_wq,
1249 &target->tl_err_work);
1253 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1255 } else if (dev->use_fmr) {
1256 struct ib_pool_fmr **pfmr;
1258 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1259 ib_fmr_pool_unmap(*pfmr);
1262 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1263 scmnd->sc_data_direction);
1267 * srp_claim_req - Take ownership of the scmnd associated with a request.
1268 * @ch: SRP RDMA channel.
1269 * @req: SRP request.
1270 * @sdev: If not NULL, only take ownership for this SCSI device.
1271 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1272 * ownership of @req->scmnd if it equals @scmnd.
1275 * Either NULL or a pointer to the SCSI command the caller became owner of.
1277 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1278 struct srp_request *req,
1279 struct scsi_device *sdev,
1280 struct scsi_cmnd *scmnd)
1282 unsigned long flags;
1284 spin_lock_irqsave(&ch->lock, flags);
1286 (!sdev || req->scmnd->device == sdev) &&
1287 (!scmnd || req->scmnd == scmnd)) {
1293 spin_unlock_irqrestore(&ch->lock, flags);
1299 * srp_free_req() - Unmap data and adjust ch->req_lim.
1300 * @ch: SRP RDMA channel.
1301 * @req: Request to be freed.
1302 * @scmnd: SCSI command associated with @req.
1303 * @req_lim_delta: Amount to be added to @target->req_lim.
1305 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1306 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1308 unsigned long flags;
1310 srp_unmap_data(scmnd, ch, req);
1312 spin_lock_irqsave(&ch->lock, flags);
1313 ch->req_lim += req_lim_delta;
1314 spin_unlock_irqrestore(&ch->lock, flags);
1317 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1318 struct scsi_device *sdev, int result)
1320 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1323 srp_free_req(ch, req, scmnd, 0);
1324 scmnd->result = result;
1325 scmnd->scsi_done(scmnd);
1329 static void srp_terminate_io(struct srp_rport *rport)
1331 struct srp_target_port *target = rport->lld_data;
1332 struct srp_rdma_ch *ch;
1333 struct Scsi_Host *shost = target->scsi_host;
1334 struct scsi_device *sdev;
1338 * Invoking srp_terminate_io() while srp_queuecommand() is running
1339 * is not safe. Hence the warning statement below.
1341 shost_for_each_device(sdev, shost)
1342 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1344 for (i = 0; i < target->ch_count; i++) {
1345 ch = &target->ch[i];
1347 for (j = 0; j < target->req_ring_size; ++j) {
1348 struct srp_request *req = &ch->req_ring[j];
1350 srp_finish_req(ch, req, NULL,
1351 DID_TRANSPORT_FAILFAST << 16);
1357 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1358 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1359 * srp_reset_device() or srp_reset_host() calls will occur while this function
1360 * is in progress. One way to realize that is not to call this function
1361 * directly but to call srp_reconnect_rport() instead since that last function
1362 * serializes calls of this function via rport->mutex and also blocks
1363 * srp_queuecommand() calls before invoking this function.
1365 static int srp_rport_reconnect(struct srp_rport *rport)
1367 struct srp_target_port *target = rport->lld_data;
1368 struct srp_rdma_ch *ch;
1370 bool multich = false;
1372 srp_disconnect_target(target);
1374 if (target->state == SRP_TARGET_SCANNING)
1378 * Now get a new local CM ID so that we avoid confusing the target in
1379 * case things are really fouled up. Doing so also ensures that all CM
1380 * callbacks will have finished before a new QP is allocated.
1382 for (i = 0; i < target->ch_count; i++) {
1383 ch = &target->ch[i];
1384 ret += srp_new_cm_id(ch);
1386 for (i = 0; i < target->ch_count; i++) {
1387 ch = &target->ch[i];
1388 for (j = 0; j < target->req_ring_size; ++j) {
1389 struct srp_request *req = &ch->req_ring[j];
1391 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1394 for (i = 0; i < target->ch_count; i++) {
1395 ch = &target->ch[i];
1397 * Whether or not creating a new CM ID succeeded, create a new
1398 * QP. This guarantees that all completion callback function
1399 * invocations have finished before request resetting starts.
1401 ret += srp_create_ch_ib(ch);
1403 INIT_LIST_HEAD(&ch->free_tx);
1404 for (j = 0; j < target->queue_size; ++j)
1405 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1408 target->qp_in_error = false;
1410 for (i = 0; i < target->ch_count; i++) {
1411 ch = &target->ch[i];
1414 ret = srp_connect_ch(ch, multich);
1419 shost_printk(KERN_INFO, target->scsi_host,
1420 PFX "reconnect succeeded\n");
1425 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1426 unsigned int dma_len, u32 rkey)
1428 struct srp_direct_buf *desc = state->desc;
1430 WARN_ON_ONCE(!dma_len);
1432 desc->va = cpu_to_be64(dma_addr);
1433 desc->key = cpu_to_be32(rkey);
1434 desc->len = cpu_to_be32(dma_len);
1436 state->total_len += dma_len;
1441 static int srp_map_finish_fmr(struct srp_map_state *state,
1442 struct srp_rdma_ch *ch)
1444 struct srp_target_port *target = ch->target;
1445 struct srp_device *dev = target->srp_host->srp_dev;
1446 struct ib_pool_fmr *fmr;
1449 if (state->fmr.next >= state->fmr.end) {
1450 shost_printk(KERN_ERR, ch->target->scsi_host,
1451 PFX "Out of MRs (mr_per_cmd = %d)\n",
1452 ch->target->mr_per_cmd);
1456 WARN_ON_ONCE(!dev->use_fmr);
1458 if (state->npages == 0)
1461 if (state->npages == 1 && target->global_rkey) {
1462 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1463 target->global_rkey);
1467 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1468 state->npages, io_addr);
1470 return PTR_ERR(fmr);
1472 *state->fmr.next++ = fmr;
1475 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1476 state->dma_len, fmr->fmr->rkey);
1485 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1487 srp_handle_qp_err(cq, wc, "FAST REG");
1491 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1492 * where to start in the first element. If sg_offset_p != NULL then
1493 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1494 * byte that has not yet been mapped.
1496 static int srp_map_finish_fr(struct srp_map_state *state,
1497 struct srp_request *req,
1498 struct srp_rdma_ch *ch, int sg_nents,
1499 unsigned int *sg_offset_p)
1501 struct srp_target_port *target = ch->target;
1502 struct srp_device *dev = target->srp_host->srp_dev;
1503 struct ib_reg_wr wr;
1504 struct srp_fr_desc *desc;
1508 if (state->fr.next >= state->fr.end) {
1509 shost_printk(KERN_ERR, ch->target->scsi_host,
1510 PFX "Out of MRs (mr_per_cmd = %d)\n",
1511 ch->target->mr_per_cmd);
1515 WARN_ON_ONCE(!dev->use_fast_reg);
1517 if (sg_nents == 1 && target->global_rkey) {
1518 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1520 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1521 sg_dma_len(state->sg) - sg_offset,
1522 target->global_rkey);
1528 desc = srp_fr_pool_get(ch->fr_pool);
1532 rkey = ib_inc_rkey(desc->mr->rkey);
1533 ib_update_fast_reg_key(desc->mr, rkey);
1535 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1537 if (unlikely(n < 0)) {
1538 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1539 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1540 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1541 sg_offset_p ? *sg_offset_p : -1, n);
1545 WARN_ON_ONCE(desc->mr->length == 0);
1547 req->reg_cqe.done = srp_reg_mr_err_done;
1550 wr.wr.opcode = IB_WR_REG_MR;
1551 wr.wr.wr_cqe = &req->reg_cqe;
1553 wr.wr.send_flags = 0;
1555 wr.key = desc->mr->rkey;
1556 wr.access = (IB_ACCESS_LOCAL_WRITE |
1557 IB_ACCESS_REMOTE_READ |
1558 IB_ACCESS_REMOTE_WRITE);
1560 *state->fr.next++ = desc;
1563 srp_map_desc(state, desc->mr->iova,
1564 desc->mr->length, desc->mr->rkey);
1566 err = ib_post_send(ch->qp, &wr.wr, NULL);
1567 if (unlikely(err)) {
1568 WARN_ON_ONCE(err == -ENOMEM);
1575 static int srp_map_sg_entry(struct srp_map_state *state,
1576 struct srp_rdma_ch *ch,
1577 struct scatterlist *sg)
1579 struct srp_target_port *target = ch->target;
1580 struct srp_device *dev = target->srp_host->srp_dev;
1581 struct ib_device *ibdev = dev->dev;
1582 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1583 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1584 unsigned int len = 0;
1587 WARN_ON_ONCE(!dma_len);
1590 unsigned offset = dma_addr & ~dev->mr_page_mask;
1592 if (state->npages == dev->max_pages_per_mr ||
1593 (state->npages > 0 && offset != 0)) {
1594 ret = srp_map_finish_fmr(state, ch);
1599 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1602 state->base_dma_addr = dma_addr;
1603 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1604 state->dma_len += len;
1610 * If the end of the MR is not on a page boundary then we need to
1611 * close it out and start a new one -- we can only merge at page
1615 if ((dma_addr & ~dev->mr_page_mask) != 0)
1616 ret = srp_map_finish_fmr(state, ch);
1620 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1621 struct srp_request *req, struct scatterlist *scat,
1624 struct scatterlist *sg;
1627 state->pages = req->map_page;
1628 state->fmr.next = req->fmr_list;
1629 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1631 for_each_sg(scat, sg, count, i) {
1632 ret = srp_map_sg_entry(state, ch, sg);
1637 ret = srp_map_finish_fmr(state, ch);
1644 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1645 struct srp_request *req, struct scatterlist *scat,
1648 unsigned int sg_offset = 0;
1650 state->fr.next = req->fr_list;
1651 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1660 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1661 if (unlikely(n < 0))
1665 for (i = 0; i < n; i++)
1666 state->sg = sg_next(state->sg);
1672 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1673 struct srp_request *req, struct scatterlist *scat,
1676 struct srp_target_port *target = ch->target;
1677 struct srp_device *dev = target->srp_host->srp_dev;
1678 struct scatterlist *sg;
1681 for_each_sg(scat, sg, count, i) {
1682 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1683 ib_sg_dma_len(dev->dev, sg),
1684 target->global_rkey);
1691 * Register the indirect data buffer descriptor with the HCA.
1693 * Note: since the indirect data buffer descriptor has been allocated with
1694 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1697 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1698 void **next_mr, void **end_mr, u32 idb_len,
1701 struct srp_target_port *target = ch->target;
1702 struct srp_device *dev = target->srp_host->srp_dev;
1703 struct srp_map_state state;
1704 struct srp_direct_buf idb_desc;
1706 struct scatterlist idb_sg[1];
1709 memset(&state, 0, sizeof(state));
1710 memset(&idb_desc, 0, sizeof(idb_desc));
1711 state.gen.next = next_mr;
1712 state.gen.end = end_mr;
1713 state.desc = &idb_desc;
1714 state.base_dma_addr = req->indirect_dma_addr;
1715 state.dma_len = idb_len;
1717 if (dev->use_fast_reg) {
1719 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1720 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1721 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1722 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1724 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1727 WARN_ON_ONCE(ret < 1);
1728 } else if (dev->use_fmr) {
1729 state.pages = idb_pages;
1730 state.pages[0] = (req->indirect_dma_addr &
1733 ret = srp_map_finish_fmr(&state, ch);
1740 *idb_rkey = idb_desc.key;
1745 static void srp_check_mapping(struct srp_map_state *state,
1746 struct srp_rdma_ch *ch, struct srp_request *req,
1747 struct scatterlist *scat, int count)
1749 struct srp_device *dev = ch->target->srp_host->srp_dev;
1750 struct srp_fr_desc **pfr;
1751 u64 desc_len = 0, mr_len = 0;
1754 for (i = 0; i < state->ndesc; i++)
1755 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1756 if (dev->use_fast_reg)
1757 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1758 mr_len += (*pfr)->mr->length;
1759 else if (dev->use_fmr)
1760 for (i = 0; i < state->nmdesc; i++)
1761 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1762 if (desc_len != scsi_bufflen(req->scmnd) ||
1763 mr_len > scsi_bufflen(req->scmnd))
1764 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1765 scsi_bufflen(req->scmnd), desc_len, mr_len,
1766 state->ndesc, state->nmdesc);
1770 * srp_map_data() - map SCSI data buffer onto an SRP request
1771 * @scmnd: SCSI command to map
1772 * @ch: SRP RDMA channel
1775 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1778 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1779 struct srp_request *req)
1781 struct srp_target_port *target = ch->target;
1782 struct scatterlist *scat;
1783 struct srp_cmd *cmd = req->cmd->buf;
1784 int len, nents, count, ret;
1785 struct srp_device *dev;
1786 struct ib_device *ibdev;
1787 struct srp_map_state state;
1788 struct srp_indirect_buf *indirect_hdr;
1789 u32 idb_len, table_len;
1793 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1794 return sizeof (struct srp_cmd);
1796 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1797 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1798 shost_printk(KERN_WARNING, target->scsi_host,
1799 PFX "Unhandled data direction %d\n",
1800 scmnd->sc_data_direction);
1804 nents = scsi_sg_count(scmnd);
1805 scat = scsi_sglist(scmnd);
1807 dev = target->srp_host->srp_dev;
1810 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1811 if (unlikely(count == 0))
1814 fmt = SRP_DATA_DESC_DIRECT;
1815 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1817 if (count == 1 && target->global_rkey) {
1819 * The midlayer only generated a single gather/scatter
1820 * entry, or DMA mapping coalesced everything to a
1821 * single entry. So a direct descriptor along with
1822 * the DMA MR suffices.
1824 struct srp_direct_buf *buf = (void *) cmd->add_data;
1826 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1827 buf->key = cpu_to_be32(target->global_rkey);
1828 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1835 * We have more than one scatter/gather entry, so build our indirect
1836 * descriptor table, trying to merge as many entries as we can.
1838 indirect_hdr = (void *) cmd->add_data;
1840 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1841 target->indirect_size, DMA_TO_DEVICE);
1843 memset(&state, 0, sizeof(state));
1844 state.desc = req->indirect_desc;
1845 if (dev->use_fast_reg)
1846 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1847 else if (dev->use_fmr)
1848 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1850 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1851 req->nmdesc = state.nmdesc;
1856 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1857 "Memory mapping consistency check");
1858 if (DYNAMIC_DEBUG_BRANCH(ddm))
1859 srp_check_mapping(&state, ch, req, scat, count);
1862 /* We've mapped the request, now pull as much of the indirect
1863 * descriptor table as we can into the command buffer. If this
1864 * target is not using an external indirect table, we are
1865 * guaranteed to fit into the command, as the SCSI layer won't
1866 * give us more S/G entries than we allow.
1868 if (state.ndesc == 1) {
1870 * Memory registration collapsed the sg-list into one entry,
1871 * so use a direct descriptor.
1873 struct srp_direct_buf *buf = (void *) cmd->add_data;
1875 *buf = req->indirect_desc[0];
1879 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1880 !target->allow_ext_sg)) {
1881 shost_printk(KERN_ERR, target->scsi_host,
1882 "Could not fit S/G list into SRP_CMD\n");
1887 count = min(state.ndesc, target->cmd_sg_cnt);
1888 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1889 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1891 fmt = SRP_DATA_DESC_INDIRECT;
1892 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1893 len += count * sizeof (struct srp_direct_buf);
1895 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1896 count * sizeof (struct srp_direct_buf));
1898 if (!target->global_rkey) {
1899 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1900 idb_len, &idb_rkey);
1905 idb_rkey = cpu_to_be32(target->global_rkey);
1908 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1909 indirect_hdr->table_desc.key = idb_rkey;
1910 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1911 indirect_hdr->len = cpu_to_be32(state.total_len);
1913 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1914 cmd->data_out_desc_cnt = count;
1916 cmd->data_in_desc_cnt = count;
1918 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1922 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1923 cmd->buf_fmt = fmt << 4;
1930 srp_unmap_data(scmnd, ch, req);
1931 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1937 * Return an IU and possible credit to the free pool
1939 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1940 enum srp_iu_type iu_type)
1942 unsigned long flags;
1944 spin_lock_irqsave(&ch->lock, flags);
1945 list_add(&iu->list, &ch->free_tx);
1946 if (iu_type != SRP_IU_RSP)
1948 spin_unlock_irqrestore(&ch->lock, flags);
1952 * Must be called with ch->lock held to protect req_lim and free_tx.
1953 * If IU is not sent, it must be returned using srp_put_tx_iu().
1956 * An upper limit for the number of allocated information units for each
1958 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1959 * more than Scsi_Host.can_queue requests.
1960 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1961 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1962 * one unanswered SRP request to an initiator.
1964 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1965 enum srp_iu_type iu_type)
1967 struct srp_target_port *target = ch->target;
1968 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1971 lockdep_assert_held(&ch->lock);
1973 ib_process_cq_direct(ch->send_cq, -1);
1975 if (list_empty(&ch->free_tx))
1978 /* Initiator responses to target requests do not consume credits */
1979 if (iu_type != SRP_IU_RSP) {
1980 if (ch->req_lim <= rsv) {
1981 ++target->zero_req_lim;
1988 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1989 list_del(&iu->list);
1994 * Note: if this function is called from inside ib_drain_sq() then it will
1995 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1996 * with status IB_WC_SUCCESS then that's a bug.
1998 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2000 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2001 struct srp_rdma_ch *ch = cq->cq_context;
2003 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2004 srp_handle_qp_err(cq, wc, "SEND");
2008 lockdep_assert_held(&ch->lock);
2010 list_add(&iu->list, &ch->free_tx);
2013 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2015 struct srp_target_port *target = ch->target;
2017 struct ib_send_wr wr;
2019 list.addr = iu->dma;
2021 list.lkey = target->lkey;
2023 iu->cqe.done = srp_send_done;
2026 wr.wr_cqe = &iu->cqe;
2029 wr.opcode = IB_WR_SEND;
2030 wr.send_flags = IB_SEND_SIGNALED;
2032 return ib_post_send(ch->qp, &wr, NULL);
2035 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2037 struct srp_target_port *target = ch->target;
2038 struct ib_recv_wr wr;
2041 list.addr = iu->dma;
2042 list.length = iu->size;
2043 list.lkey = target->lkey;
2045 iu->cqe.done = srp_recv_done;
2048 wr.wr_cqe = &iu->cqe;
2052 return ib_post_recv(ch->qp, &wr, NULL);
2055 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2057 struct srp_target_port *target = ch->target;
2058 struct srp_request *req;
2059 struct scsi_cmnd *scmnd;
2060 unsigned long flags;
2062 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2063 spin_lock_irqsave(&ch->lock, flags);
2064 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2065 if (rsp->tag == ch->tsk_mgmt_tag) {
2066 ch->tsk_mgmt_status = -1;
2067 if (be32_to_cpu(rsp->resp_data_len) >= 4)
2068 ch->tsk_mgmt_status = rsp->data[3];
2069 complete(&ch->tsk_mgmt_done);
2071 shost_printk(KERN_ERR, target->scsi_host,
2072 "Received tsk mgmt response too late for tag %#llx\n",
2075 spin_unlock_irqrestore(&ch->lock, flags);
2077 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2078 if (scmnd && scmnd->host_scribble) {
2079 req = (void *)scmnd->host_scribble;
2080 scmnd = srp_claim_req(ch, req, NULL, scmnd);
2085 shost_printk(KERN_ERR, target->scsi_host,
2086 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2087 rsp->tag, ch - target->ch, ch->qp->qp_num);
2089 spin_lock_irqsave(&ch->lock, flags);
2090 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2091 spin_unlock_irqrestore(&ch->lock, flags);
2095 scmnd->result = rsp->status;
2097 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2098 memcpy(scmnd->sense_buffer, rsp->data +
2099 be32_to_cpu(rsp->resp_data_len),
2100 min_t(int, be32_to_cpu(rsp->sense_data_len),
2101 SCSI_SENSE_BUFFERSIZE));
2104 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2105 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2106 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2107 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2108 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2109 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2110 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2111 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2113 srp_free_req(ch, req, scmnd,
2114 be32_to_cpu(rsp->req_lim_delta));
2116 scmnd->host_scribble = NULL;
2117 scmnd->scsi_done(scmnd);
2121 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2124 struct srp_target_port *target = ch->target;
2125 struct ib_device *dev = target->srp_host->srp_dev->dev;
2126 unsigned long flags;
2130 spin_lock_irqsave(&ch->lock, flags);
2131 ch->req_lim += req_delta;
2132 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2133 spin_unlock_irqrestore(&ch->lock, flags);
2136 shost_printk(KERN_ERR, target->scsi_host, PFX
2137 "no IU available to send response\n");
2141 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2142 memcpy(iu->buf, rsp, len);
2143 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2145 err = srp_post_send(ch, iu, len);
2147 shost_printk(KERN_ERR, target->scsi_host, PFX
2148 "unable to post response: %d\n", err);
2149 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2155 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2156 struct srp_cred_req *req)
2158 struct srp_cred_rsp rsp = {
2159 .opcode = SRP_CRED_RSP,
2162 s32 delta = be32_to_cpu(req->req_lim_delta);
2164 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2165 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2166 "problems processing SRP_CRED_REQ\n");
2169 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2170 struct srp_aer_req *req)
2172 struct srp_target_port *target = ch->target;
2173 struct srp_aer_rsp rsp = {
2174 .opcode = SRP_AER_RSP,
2177 s32 delta = be32_to_cpu(req->req_lim_delta);
2179 shost_printk(KERN_ERR, target->scsi_host, PFX
2180 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2182 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2183 shost_printk(KERN_ERR, target->scsi_host, PFX
2184 "problems processing SRP_AER_REQ\n");
2187 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2189 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2190 struct srp_rdma_ch *ch = cq->cq_context;
2191 struct srp_target_port *target = ch->target;
2192 struct ib_device *dev = target->srp_host->srp_dev->dev;
2196 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2197 srp_handle_qp_err(cq, wc, "RECV");
2201 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2204 opcode = *(u8 *) iu->buf;
2207 shost_printk(KERN_ERR, target->scsi_host,
2208 PFX "recv completion, opcode 0x%02x\n", opcode);
2209 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2210 iu->buf, wc->byte_len, true);
2215 srp_process_rsp(ch, iu->buf);
2219 srp_process_cred_req(ch, iu->buf);
2223 srp_process_aer_req(ch, iu->buf);
2227 /* XXX Handle target logout */
2228 shost_printk(KERN_WARNING, target->scsi_host,
2229 PFX "Got target logout request\n");
2233 shost_printk(KERN_WARNING, target->scsi_host,
2234 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2238 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2241 res = srp_post_recv(ch, iu);
2243 shost_printk(KERN_ERR, target->scsi_host,
2244 PFX "Recv failed with error code %d\n", res);
2248 * srp_tl_err_work() - handle a transport layer error
2249 * @work: Work structure embedded in an SRP target port.
2251 * Note: This function may get invoked before the rport has been created,
2252 * hence the target->rport test.
2254 static void srp_tl_err_work(struct work_struct *work)
2256 struct srp_target_port *target;
2258 target = container_of(work, struct srp_target_port, tl_err_work);
2260 srp_start_tl_fail_timers(target->rport);
2263 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2266 struct srp_rdma_ch *ch = cq->cq_context;
2267 struct srp_target_port *target = ch->target;
2269 if (ch->connected && !target->qp_in_error) {
2270 shost_printk(KERN_ERR, target->scsi_host,
2271 PFX "failed %s status %s (%d) for CQE %p\n",
2272 opname, ib_wc_status_msg(wc->status), wc->status,
2274 queue_work(system_long_wq, &target->tl_err_work);
2276 target->qp_in_error = true;
2279 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2281 struct srp_target_port *target = host_to_target(shost);
2282 struct srp_rport *rport = target->rport;
2283 struct srp_rdma_ch *ch;
2284 struct srp_request *req;
2286 struct srp_cmd *cmd;
2287 struct ib_device *dev;
2288 unsigned long flags;
2292 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2295 * The SCSI EH thread is the only context from which srp_queuecommand()
2296 * can get invoked for blocked devices (SDEV_BLOCK /
2297 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2298 * locking the rport mutex if invoked from inside the SCSI EH.
2301 mutex_lock(&rport->mutex);
2303 scmnd->result = srp_chkready(target->rport);
2304 if (unlikely(scmnd->result))
2307 WARN_ON_ONCE(scmnd->request->tag < 0);
2308 tag = blk_mq_unique_tag(scmnd->request);
2309 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2310 idx = blk_mq_unique_tag_to_tag(tag);
2311 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2312 dev_name(&shost->shost_gendev), tag, idx,
2313 target->req_ring_size);
2315 spin_lock_irqsave(&ch->lock, flags);
2316 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2317 spin_unlock_irqrestore(&ch->lock, flags);
2322 req = &ch->req_ring[idx];
2323 dev = target->srp_host->srp_dev->dev;
2324 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2327 scmnd->host_scribble = (void *) req;
2330 memset(cmd, 0, sizeof *cmd);
2332 cmd->opcode = SRP_CMD;
2333 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2335 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2340 len = srp_map_data(scmnd, ch, req);
2342 shost_printk(KERN_ERR, target->scsi_host,
2343 PFX "Failed to map data (%d)\n", len);
2345 * If we ran out of memory descriptors (-ENOMEM) because an
2346 * application is queuing many requests with more than
2347 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2348 * to reduce queue depth temporarily.
2350 scmnd->result = len == -ENOMEM ?
2351 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2355 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2358 if (srp_post_send(ch, iu, len)) {
2359 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2360 scmnd->result = DID_ERROR << 16;
2368 mutex_unlock(&rport->mutex);
2373 srp_unmap_data(scmnd, ch, req);
2376 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2379 * Avoid that the loops that iterate over the request ring can
2380 * encounter a dangling SCSI command pointer.
2385 if (scmnd->result) {
2386 scmnd->scsi_done(scmnd);
2389 ret = SCSI_MLQUEUE_HOST_BUSY;
2396 * Note: the resources allocated in this function are freed in
2399 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2401 struct srp_target_port *target = ch->target;
2404 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2408 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2413 for (i = 0; i < target->queue_size; ++i) {
2414 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2416 GFP_KERNEL, DMA_FROM_DEVICE);
2417 if (!ch->rx_ring[i])
2421 for (i = 0; i < target->queue_size; ++i) {
2422 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2424 GFP_KERNEL, DMA_TO_DEVICE);
2425 if (!ch->tx_ring[i])
2428 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2434 for (i = 0; i < target->queue_size; ++i) {
2435 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2436 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2449 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2451 uint64_t T_tr_ns, max_compl_time_ms;
2452 uint32_t rq_tmo_jiffies;
2455 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2456 * table 91), both the QP timeout and the retry count have to be set
2457 * for RC QP's during the RTR to RTS transition.
2459 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2460 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2463 * Set target->rq_tmo_jiffies to one second more than the largest time
2464 * it can take before an error completion is generated. See also
2465 * C9-140..142 in the IBTA spec for more information about how to
2466 * convert the QP Local ACK Timeout value to nanoseconds.
2468 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2469 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2470 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2471 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2473 return rq_tmo_jiffies;
2476 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2477 const struct srp_login_rsp *lrsp,
2478 struct srp_rdma_ch *ch)
2480 struct srp_target_port *target = ch->target;
2481 struct ib_qp_attr *qp_attr = NULL;
2486 if (lrsp->opcode == SRP_LOGIN_RSP) {
2487 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2488 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2491 * Reserve credits for task management so we don't
2492 * bounce requests back to the SCSI mid-layer.
2494 target->scsi_host->can_queue
2495 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2496 target->scsi_host->can_queue);
2497 target->scsi_host->cmd_per_lun
2498 = min_t(int, target->scsi_host->can_queue,
2499 target->scsi_host->cmd_per_lun);
2501 shost_printk(KERN_WARNING, target->scsi_host,
2502 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2508 ret = srp_alloc_iu_bufs(ch);
2513 for (i = 0; i < target->queue_size; i++) {
2514 struct srp_iu *iu = ch->rx_ring[i];
2516 ret = srp_post_recv(ch, iu);
2521 if (!target->using_rdma_cm) {
2523 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2527 qp_attr->qp_state = IB_QPS_RTR;
2528 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2532 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2536 qp_attr->qp_state = IB_QPS_RTS;
2537 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2541 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2543 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2547 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2557 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2558 const struct ib_cm_event *event,
2559 struct srp_rdma_ch *ch)
2561 struct srp_target_port *target = ch->target;
2562 struct Scsi_Host *shost = target->scsi_host;
2563 struct ib_class_port_info *cpi;
2567 switch (event->param.rej_rcvd.reason) {
2568 case IB_CM_REJ_PORT_CM_REDIRECT:
2569 cpi = event->param.rej_rcvd.ari;
2570 dlid = be16_to_cpu(cpi->redirect_lid);
2571 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2572 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2573 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2574 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2576 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2579 case IB_CM_REJ_PORT_REDIRECT:
2580 if (srp_target_is_topspin(target)) {
2581 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2584 * Topspin/Cisco SRP gateways incorrectly send
2585 * reject reason code 25 when they mean 24
2588 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2590 shost_printk(KERN_DEBUG, shost,
2591 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2592 be64_to_cpu(dgid->global.subnet_prefix),
2593 be64_to_cpu(dgid->global.interface_id));
2595 ch->status = SRP_PORT_REDIRECT;
2597 shost_printk(KERN_WARNING, shost,
2598 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2599 ch->status = -ECONNRESET;
2603 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2604 shost_printk(KERN_WARNING, shost,
2605 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2606 ch->status = -ECONNRESET;
2609 case IB_CM_REJ_CONSUMER_DEFINED:
2610 opcode = *(u8 *) event->private_data;
2611 if (opcode == SRP_LOGIN_REJ) {
2612 struct srp_login_rej *rej = event->private_data;
2613 u32 reason = be32_to_cpu(rej->reason);
2615 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2616 shost_printk(KERN_WARNING, shost,
2617 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2619 shost_printk(KERN_WARNING, shost, PFX
2620 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2622 target->ib_cm.orig_dgid.raw,
2625 shost_printk(KERN_WARNING, shost,
2626 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2627 " opcode 0x%02x\n", opcode);
2628 ch->status = -ECONNRESET;
2631 case IB_CM_REJ_STALE_CONN:
2632 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2633 ch->status = SRP_STALE_CONN;
2637 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2638 event->param.rej_rcvd.reason);
2639 ch->status = -ECONNRESET;
2643 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2644 const struct ib_cm_event *event)
2646 struct srp_rdma_ch *ch = cm_id->context;
2647 struct srp_target_port *target = ch->target;
2650 switch (event->event) {
2651 case IB_CM_REQ_ERROR:
2652 shost_printk(KERN_DEBUG, target->scsi_host,
2653 PFX "Sending CM REQ failed\n");
2655 ch->status = -ECONNRESET;
2658 case IB_CM_REP_RECEIVED:
2660 srp_cm_rep_handler(cm_id, event->private_data, ch);
2663 case IB_CM_REJ_RECEIVED:
2664 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2667 srp_ib_cm_rej_handler(cm_id, event, ch);
2670 case IB_CM_DREQ_RECEIVED:
2671 shost_printk(KERN_WARNING, target->scsi_host,
2672 PFX "DREQ received - connection closed\n");
2673 ch->connected = false;
2674 if (ib_send_cm_drep(cm_id, NULL, 0))
2675 shost_printk(KERN_ERR, target->scsi_host,
2676 PFX "Sending CM DREP failed\n");
2677 queue_work(system_long_wq, &target->tl_err_work);
2680 case IB_CM_TIMEWAIT_EXIT:
2681 shost_printk(KERN_ERR, target->scsi_host,
2682 PFX "connection closed\n");
2688 case IB_CM_MRA_RECEIVED:
2689 case IB_CM_DREQ_ERROR:
2690 case IB_CM_DREP_RECEIVED:
2694 shost_printk(KERN_WARNING, target->scsi_host,
2695 PFX "Unhandled CM event %d\n", event->event);
2700 complete(&ch->done);
2705 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2706 struct rdma_cm_event *event)
2708 struct srp_target_port *target = ch->target;
2709 struct Scsi_Host *shost = target->scsi_host;
2712 switch (event->status) {
2713 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2714 shost_printk(KERN_WARNING, shost,
2715 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2716 ch->status = -ECONNRESET;
2719 case IB_CM_REJ_CONSUMER_DEFINED:
2720 opcode = *(u8 *) event->param.conn.private_data;
2721 if (opcode == SRP_LOGIN_REJ) {
2722 struct srp_login_rej *rej =
2723 (struct srp_login_rej *)
2724 event->param.conn.private_data;
2725 u32 reason = be32_to_cpu(rej->reason);
2727 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2728 shost_printk(KERN_WARNING, shost,
2729 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2731 shost_printk(KERN_WARNING, shost,
2732 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2734 shost_printk(KERN_WARNING, shost,
2735 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2738 ch->status = -ECONNRESET;
2741 case IB_CM_REJ_STALE_CONN:
2742 shost_printk(KERN_WARNING, shost,
2743 " REJ reason: stale connection\n");
2744 ch->status = SRP_STALE_CONN;
2748 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2750 ch->status = -ECONNRESET;
2755 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2756 struct rdma_cm_event *event)
2758 struct srp_rdma_ch *ch = cm_id->context;
2759 struct srp_target_port *target = ch->target;
2762 switch (event->event) {
2763 case RDMA_CM_EVENT_ADDR_RESOLVED:
2768 case RDMA_CM_EVENT_ADDR_ERROR:
2769 ch->status = -ENXIO;
2773 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2778 case RDMA_CM_EVENT_ROUTE_ERROR:
2779 case RDMA_CM_EVENT_UNREACHABLE:
2780 ch->status = -EHOSTUNREACH;
2784 case RDMA_CM_EVENT_CONNECT_ERROR:
2785 shost_printk(KERN_DEBUG, target->scsi_host,
2786 PFX "Sending CM REQ failed\n");
2788 ch->status = -ECONNRESET;
2791 case RDMA_CM_EVENT_ESTABLISHED:
2793 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2796 case RDMA_CM_EVENT_REJECTED:
2797 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2800 srp_rdma_cm_rej_handler(ch, event);
2803 case RDMA_CM_EVENT_DISCONNECTED:
2804 if (ch->connected) {
2805 shost_printk(KERN_WARNING, target->scsi_host,
2806 PFX "received DREQ\n");
2807 rdma_disconnect(ch->rdma_cm.cm_id);
2810 queue_work(system_long_wq, &target->tl_err_work);
2814 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2815 shost_printk(KERN_ERR, target->scsi_host,
2816 PFX "connection closed\n");
2823 shost_printk(KERN_WARNING, target->scsi_host,
2824 PFX "Unhandled CM event %d\n", event->event);
2829 complete(&ch->done);
2835 * srp_change_queue_depth - setting device queue depth
2836 * @sdev: scsi device struct
2837 * @qdepth: requested queue depth
2839 * Returns queue depth.
2842 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2844 if (!sdev->tagged_supported)
2846 return scsi_change_queue_depth(sdev, qdepth);
2849 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2850 u8 func, u8 *status)
2852 struct srp_target_port *target = ch->target;
2853 struct srp_rport *rport = target->rport;
2854 struct ib_device *dev = target->srp_host->srp_dev->dev;
2856 struct srp_tsk_mgmt *tsk_mgmt;
2859 if (!ch->connected || target->qp_in_error)
2863 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2864 * invoked while a task management function is being sent.
2866 mutex_lock(&rport->mutex);
2867 spin_lock_irq(&ch->lock);
2868 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2869 spin_unlock_irq(&ch->lock);
2872 mutex_unlock(&rport->mutex);
2877 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2880 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2882 tsk_mgmt->opcode = SRP_TSK_MGMT;
2883 int_to_scsilun(lun, &tsk_mgmt->lun);
2884 tsk_mgmt->tsk_mgmt_func = func;
2885 tsk_mgmt->task_tag = req_tag;
2887 spin_lock_irq(&ch->lock);
2888 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2889 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2890 spin_unlock_irq(&ch->lock);
2892 init_completion(&ch->tsk_mgmt_done);
2894 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2896 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2897 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2898 mutex_unlock(&rport->mutex);
2902 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2903 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2904 if (res > 0 && status)
2905 *status = ch->tsk_mgmt_status;
2906 mutex_unlock(&rport->mutex);
2908 WARN_ON_ONCE(res < 0);
2910 return res > 0 ? 0 : -1;
2913 static int srp_abort(struct scsi_cmnd *scmnd)
2915 struct srp_target_port *target = host_to_target(scmnd->device->host);
2916 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2919 struct srp_rdma_ch *ch;
2922 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2926 tag = blk_mq_unique_tag(scmnd->request);
2927 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2928 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2930 ch = &target->ch[ch_idx];
2931 if (!srp_claim_req(ch, req, NULL, scmnd))
2933 shost_printk(KERN_ERR, target->scsi_host,
2934 "Sending SRP abort for tag %#x\n", tag);
2935 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2936 SRP_TSK_ABORT_TASK, NULL) == 0)
2938 else if (target->rport->state == SRP_RPORT_LOST)
2942 if (ret == SUCCESS) {
2943 srp_free_req(ch, req, scmnd, 0);
2944 scmnd->result = DID_ABORT << 16;
2945 scmnd->scsi_done(scmnd);
2951 static int srp_reset_device(struct scsi_cmnd *scmnd)
2953 struct srp_target_port *target = host_to_target(scmnd->device->host);
2954 struct srp_rdma_ch *ch;
2957 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2959 ch = &target->ch[0];
2960 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2961 SRP_TSK_LUN_RESET, &status))
2969 static int srp_reset_host(struct scsi_cmnd *scmnd)
2971 struct srp_target_port *target = host_to_target(scmnd->device->host);
2973 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2975 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2978 static int srp_target_alloc(struct scsi_target *starget)
2980 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
2981 struct srp_target_port *target = host_to_target(shost);
2983 if (target->target_can_queue)
2984 starget->can_queue = target->target_can_queue;
2988 static int srp_slave_alloc(struct scsi_device *sdev)
2990 struct Scsi_Host *shost = sdev->host;
2991 struct srp_target_port *target = host_to_target(shost);
2992 struct srp_device *srp_dev = target->srp_host->srp_dev;
2993 struct ib_device *ibdev = srp_dev->dev;
2995 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2996 blk_queue_virt_boundary(sdev->request_queue,
2997 ~srp_dev->mr_page_mask);
3002 static int srp_slave_configure(struct scsi_device *sdev)
3004 struct Scsi_Host *shost = sdev->host;
3005 struct srp_target_port *target = host_to_target(shost);
3006 struct request_queue *q = sdev->request_queue;
3007 unsigned long timeout;
3009 if (sdev->type == TYPE_DISK) {
3010 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3011 blk_queue_rq_timeout(q, timeout);
3017 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3020 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3022 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3025 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3028 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3030 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3033 static ssize_t show_service_id(struct device *dev,
3034 struct device_attribute *attr, char *buf)
3036 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3038 if (target->using_rdma_cm)
3040 return sprintf(buf, "0x%016llx\n",
3041 be64_to_cpu(target->ib_cm.service_id));
3044 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3047 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3049 if (target->using_rdma_cm)
3051 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3054 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3057 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3059 return sprintf(buf, "%pI6\n", target->sgid.raw);
3062 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3065 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3066 struct srp_rdma_ch *ch = &target->ch[0];
3068 if (target->using_rdma_cm)
3070 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3073 static ssize_t show_orig_dgid(struct device *dev,
3074 struct device_attribute *attr, char *buf)
3076 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3078 if (target->using_rdma_cm)
3080 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3083 static ssize_t show_req_lim(struct device *dev,
3084 struct device_attribute *attr, char *buf)
3086 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3087 struct srp_rdma_ch *ch;
3088 int i, req_lim = INT_MAX;
3090 for (i = 0; i < target->ch_count; i++) {
3091 ch = &target->ch[i];
3092 req_lim = min(req_lim, ch->req_lim);
3094 return sprintf(buf, "%d\n", req_lim);
3097 static ssize_t show_zero_req_lim(struct device *dev,
3098 struct device_attribute *attr, char *buf)
3100 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3102 return sprintf(buf, "%d\n", target->zero_req_lim);
3105 static ssize_t show_local_ib_port(struct device *dev,
3106 struct device_attribute *attr, char *buf)
3108 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3110 return sprintf(buf, "%d\n", target->srp_host->port);
3113 static ssize_t show_local_ib_device(struct device *dev,
3114 struct device_attribute *attr, char *buf)
3116 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3118 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
3121 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3124 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3126 return sprintf(buf, "%d\n", target->ch_count);
3129 static ssize_t show_comp_vector(struct device *dev,
3130 struct device_attribute *attr, char *buf)
3132 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3134 return sprintf(buf, "%d\n", target->comp_vector);
3137 static ssize_t show_tl_retry_count(struct device *dev,
3138 struct device_attribute *attr, char *buf)
3140 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3142 return sprintf(buf, "%d\n", target->tl_retry_count);
3145 static ssize_t show_cmd_sg_entries(struct device *dev,
3146 struct device_attribute *attr, char *buf)
3148 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3150 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3153 static ssize_t show_allow_ext_sg(struct device *dev,
3154 struct device_attribute *attr, char *buf)
3156 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3158 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3161 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
3162 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
3163 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
3164 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
3165 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
3166 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
3167 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
3168 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
3169 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
3170 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
3171 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3172 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
3173 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
3174 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
3175 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
3176 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
3178 static struct device_attribute *srp_host_attrs[] = {
3181 &dev_attr_service_id,
3185 &dev_attr_orig_dgid,
3187 &dev_attr_zero_req_lim,
3188 &dev_attr_local_ib_port,
3189 &dev_attr_local_ib_device,
3191 &dev_attr_comp_vector,
3192 &dev_attr_tl_retry_count,
3193 &dev_attr_cmd_sg_entries,
3194 &dev_attr_allow_ext_sg,
3198 static struct scsi_host_template srp_template = {
3199 .module = THIS_MODULE,
3200 .name = "InfiniBand SRP initiator",
3201 .proc_name = DRV_NAME,
3202 .target_alloc = srp_target_alloc,
3203 .slave_alloc = srp_slave_alloc,
3204 .slave_configure = srp_slave_configure,
3205 .info = srp_target_info,
3206 .queuecommand = srp_queuecommand,
3207 .change_queue_depth = srp_change_queue_depth,
3208 .eh_timed_out = srp_timed_out,
3209 .eh_abort_handler = srp_abort,
3210 .eh_device_reset_handler = srp_reset_device,
3211 .eh_host_reset_handler = srp_reset_host,
3212 .skip_settle_delay = true,
3213 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
3214 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
3216 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
3217 .use_clustering = ENABLE_CLUSTERING,
3218 .shost_attrs = srp_host_attrs,
3219 .track_queue_depth = 1,
3222 static int srp_sdev_count(struct Scsi_Host *host)
3224 struct scsi_device *sdev;
3227 shost_for_each_device(sdev, host)
3235 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3236 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3237 * removal has been scheduled.
3238 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3240 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3242 struct srp_rport_identifiers ids;
3243 struct srp_rport *rport;
3245 target->state = SRP_TARGET_SCANNING;
3246 sprintf(target->target_name, "SRP.T10:%016llX",
3247 be64_to_cpu(target->id_ext));
3249 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3252 memcpy(ids.port_id, &target->id_ext, 8);
3253 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3254 ids.roles = SRP_RPORT_ROLE_TARGET;
3255 rport = srp_rport_add(target->scsi_host, &ids);
3256 if (IS_ERR(rport)) {
3257 scsi_remove_host(target->scsi_host);
3258 return PTR_ERR(rport);
3261 rport->lld_data = target;
3262 target->rport = rport;
3264 spin_lock(&host->target_lock);
3265 list_add_tail(&target->list, &host->target_list);
3266 spin_unlock(&host->target_lock);
3268 scsi_scan_target(&target->scsi_host->shost_gendev,
3269 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3271 if (srp_connected_ch(target) < target->ch_count ||
3272 target->qp_in_error) {
3273 shost_printk(KERN_INFO, target->scsi_host,
3274 PFX "SCSI scan failed - removing SCSI host\n");
3275 srp_queue_remove_work(target);
3279 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3280 dev_name(&target->scsi_host->shost_gendev),
3281 srp_sdev_count(target->scsi_host));
3283 spin_lock_irq(&target->lock);
3284 if (target->state == SRP_TARGET_SCANNING)
3285 target->state = SRP_TARGET_LIVE;
3286 spin_unlock_irq(&target->lock);
3292 static void srp_release_dev(struct device *dev)
3294 struct srp_host *host =
3295 container_of(dev, struct srp_host, dev);
3297 complete(&host->released);
3300 static struct class srp_class = {
3301 .name = "infiniband_srp",
3302 .dev_release = srp_release_dev
3306 * srp_conn_unique() - check whether the connection to a target is unique
3308 * @target: SRP target port.
3310 static bool srp_conn_unique(struct srp_host *host,
3311 struct srp_target_port *target)
3313 struct srp_target_port *t;
3316 if (target->state == SRP_TARGET_REMOVED)
3321 spin_lock(&host->target_lock);
3322 list_for_each_entry(t, &host->target_list, list) {
3324 target->id_ext == t->id_ext &&
3325 target->ioc_guid == t->ioc_guid &&
3326 target->initiator_ext == t->initiator_ext) {
3331 spin_unlock(&host->target_lock);
3338 * Target ports are added by writing
3340 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3341 * pkey=<P_Key>,service_id=<service ID>
3343 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3344 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3346 * to the add_target sysfs attribute.
3350 SRP_OPT_ID_EXT = 1 << 0,
3351 SRP_OPT_IOC_GUID = 1 << 1,
3352 SRP_OPT_DGID = 1 << 2,
3353 SRP_OPT_PKEY = 1 << 3,
3354 SRP_OPT_SERVICE_ID = 1 << 4,
3355 SRP_OPT_MAX_SECT = 1 << 5,
3356 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3357 SRP_OPT_IO_CLASS = 1 << 7,
3358 SRP_OPT_INITIATOR_EXT = 1 << 8,
3359 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3360 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3361 SRP_OPT_SG_TABLESIZE = 1 << 11,
3362 SRP_OPT_COMP_VECTOR = 1 << 12,
3363 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3364 SRP_OPT_QUEUE_SIZE = 1 << 14,
3365 SRP_OPT_IP_SRC = 1 << 15,
3366 SRP_OPT_IP_DEST = 1 << 16,
3367 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3370 static unsigned int srp_opt_mandatory[] = {
3381 static const match_table_t srp_opt_tokens = {
3382 { SRP_OPT_ID_EXT, "id_ext=%s" },
3383 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3384 { SRP_OPT_DGID, "dgid=%s" },
3385 { SRP_OPT_PKEY, "pkey=%x" },
3386 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3387 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3388 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3389 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3390 { SRP_OPT_IO_CLASS, "io_class=%x" },
3391 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3392 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3393 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3394 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3395 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3396 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3397 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3398 { SRP_OPT_IP_SRC, "src=%s" },
3399 { SRP_OPT_IP_DEST, "dest=%s" },
3400 { SRP_OPT_ERR, NULL }
3404 * srp_parse_in - parse an IP address and port number combination
3405 * @net: [in] Network namespace.
3406 * @sa: [out] Address family, IP address and port number.
3407 * @addr_port_str: [in] IP address and port number.
3408 * @has_port: [out] Whether or not @addr_port_str includes a port number.
3410 * Parse the following address formats:
3411 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3412 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3414 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3415 const char *addr_port_str, bool *has_port)
3417 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3423 port_str = strrchr(addr, ':');
3424 if (port_str && strchr(port_str, ']'))
3429 *has_port = port_str != NULL;
3430 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3431 if (ret && addr[0]) {
3432 addr_end = addr + strlen(addr) - 1;
3433 if (addr[0] == '[' && *addr_end == ']') {
3435 ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3440 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3444 static int srp_parse_options(struct net *net, const char *buf,
3445 struct srp_target_port *target)
3447 char *options, *sep_opt;
3449 substring_t args[MAX_OPT_ARGS];
3450 unsigned long long ull;
3457 options = kstrdup(buf, GFP_KERNEL);
3462 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3466 token = match_token(p, srp_opt_tokens, args);
3470 case SRP_OPT_ID_EXT:
3471 p = match_strdup(args);
3476 ret = kstrtoull(p, 16, &ull);
3478 pr_warn("invalid id_ext parameter '%s'\n", p);
3482 target->id_ext = cpu_to_be64(ull);
3486 case SRP_OPT_IOC_GUID:
3487 p = match_strdup(args);
3492 ret = kstrtoull(p, 16, &ull);
3494 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3498 target->ioc_guid = cpu_to_be64(ull);
3503 p = match_strdup(args);
3508 if (strlen(p) != 32) {
3509 pr_warn("bad dest GID parameter '%s'\n", p);
3514 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3521 if (match_hex(args, &token)) {
3522 pr_warn("bad P_Key parameter '%s'\n", p);
3525 target->ib_cm.pkey = cpu_to_be16(token);
3528 case SRP_OPT_SERVICE_ID:
3529 p = match_strdup(args);
3534 ret = kstrtoull(p, 16, &ull);
3536 pr_warn("bad service_id parameter '%s'\n", p);
3540 target->ib_cm.service_id = cpu_to_be64(ull);
3544 case SRP_OPT_IP_SRC:
3545 p = match_strdup(args);
3550 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
3553 pr_warn("bad source parameter '%s'\n", p);
3557 target->rdma_cm.src_specified = true;
3561 case SRP_OPT_IP_DEST:
3562 p = match_strdup(args);
3567 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
3572 pr_warn("bad dest parameter '%s'\n", p);
3576 target->using_rdma_cm = true;
3580 case SRP_OPT_MAX_SECT:
3581 if (match_int(args, &token)) {
3582 pr_warn("bad max sect parameter '%s'\n", p);
3585 target->scsi_host->max_sectors = token;
3588 case SRP_OPT_QUEUE_SIZE:
3589 if (match_int(args, &token) || token < 1) {
3590 pr_warn("bad queue_size parameter '%s'\n", p);
3593 target->scsi_host->can_queue = token;
3594 target->queue_size = token + SRP_RSP_SQ_SIZE +
3595 SRP_TSK_MGMT_SQ_SIZE;
3596 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3597 target->scsi_host->cmd_per_lun = token;
3600 case SRP_OPT_MAX_CMD_PER_LUN:
3601 if (match_int(args, &token) || token < 1) {
3602 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3606 target->scsi_host->cmd_per_lun = token;
3609 case SRP_OPT_TARGET_CAN_QUEUE:
3610 if (match_int(args, &token) || token < 1) {
3611 pr_warn("bad max target_can_queue parameter '%s'\n",
3615 target->target_can_queue = token;
3618 case SRP_OPT_IO_CLASS:
3619 if (match_hex(args, &token)) {
3620 pr_warn("bad IO class parameter '%s'\n", p);
3623 if (token != SRP_REV10_IB_IO_CLASS &&
3624 token != SRP_REV16A_IB_IO_CLASS) {
3625 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3626 token, SRP_REV10_IB_IO_CLASS,
3627 SRP_REV16A_IB_IO_CLASS);
3630 target->io_class = token;
3633 case SRP_OPT_INITIATOR_EXT:
3634 p = match_strdup(args);
3639 ret = kstrtoull(p, 16, &ull);
3641 pr_warn("bad initiator_ext value '%s'\n", p);
3645 target->initiator_ext = cpu_to_be64(ull);
3649 case SRP_OPT_CMD_SG_ENTRIES:
3650 if (match_int(args, &token) || token < 1 || token > 255) {
3651 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3655 target->cmd_sg_cnt = token;
3658 case SRP_OPT_ALLOW_EXT_SG:
3659 if (match_int(args, &token)) {
3660 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3663 target->allow_ext_sg = !!token;
3666 case SRP_OPT_SG_TABLESIZE:
3667 if (match_int(args, &token) || token < 1 ||
3668 token > SG_MAX_SEGMENTS) {
3669 pr_warn("bad max sg_tablesize parameter '%s'\n",
3673 target->sg_tablesize = token;
3676 case SRP_OPT_COMP_VECTOR:
3677 if (match_int(args, &token) || token < 0) {
3678 pr_warn("bad comp_vector parameter '%s'\n", p);
3681 target->comp_vector = token;
3684 case SRP_OPT_TL_RETRY_COUNT:
3685 if (match_int(args, &token) || token < 2 || token > 7) {
3686 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3690 target->tl_retry_count = token;
3694 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3700 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3701 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3707 pr_warn("target creation request is missing one or more parameters\n");
3709 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3710 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3711 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3712 target->scsi_host->cmd_per_lun,
3713 target->scsi_host->can_queue);
3720 static ssize_t srp_create_target(struct device *dev,
3721 struct device_attribute *attr,
3722 const char *buf, size_t count)
3724 struct srp_host *host =
3725 container_of(dev, struct srp_host, dev);
3726 struct Scsi_Host *target_host;
3727 struct srp_target_port *target;
3728 struct srp_rdma_ch *ch;
3729 struct srp_device *srp_dev = host->srp_dev;
3730 struct ib_device *ibdev = srp_dev->dev;
3731 int ret, node_idx, node, cpu, i;
3732 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3733 bool multich = false;
3735 target_host = scsi_host_alloc(&srp_template,
3736 sizeof (struct srp_target_port));
3740 target_host->transportt = ib_srp_transport_template;
3741 target_host->max_channel = 0;
3742 target_host->max_id = 1;
3743 target_host->max_lun = -1LL;
3744 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3746 target = host_to_target(target_host);
3748 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3749 target->io_class = SRP_REV16A_IB_IO_CLASS;
3750 target->scsi_host = target_host;
3751 target->srp_host = host;
3752 target->lkey = host->srp_dev->pd->local_dma_lkey;
3753 target->global_rkey = host->srp_dev->global_rkey;
3754 target->cmd_sg_cnt = cmd_sg_entries;
3755 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3756 target->allow_ext_sg = allow_ext_sg;
3757 target->tl_retry_count = 7;
3758 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3761 * Avoid that the SCSI host can be removed by srp_remove_target()
3762 * before this function returns.
3764 scsi_host_get(target->scsi_host);
3766 ret = mutex_lock_interruptible(&host->add_target_mutex);
3770 ret = srp_parse_options(target->net, buf, target);
3774 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3776 if (!srp_conn_unique(target->srp_host, target)) {
3777 if (target->using_rdma_cm) {
3778 shost_printk(KERN_INFO, target->scsi_host,
3779 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3780 be64_to_cpu(target->id_ext),
3781 be64_to_cpu(target->ioc_guid),
3782 &target->rdma_cm.dst);
3784 shost_printk(KERN_INFO, target->scsi_host,
3785 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3786 be64_to_cpu(target->id_ext),
3787 be64_to_cpu(target->ioc_guid),
3788 be64_to_cpu(target->initiator_ext));
3794 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3795 target->cmd_sg_cnt < target->sg_tablesize) {
3796 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3797 target->sg_tablesize = target->cmd_sg_cnt;
3800 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3801 bool gaps_reg = (ibdev->attrs.device_cap_flags &
3802 IB_DEVICE_SG_GAPS_REG);
3804 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3805 (ilog2(srp_dev->mr_page_size) - 9);
3808 * FR and FMR can only map one HCA page per entry. If
3809 * the start address is not aligned on a HCA page
3810 * boundary two entries will be used for the head and
3811 * the tail although these two entries combined
3812 * contain at most one HCA page of data. Hence the "+
3813 * 1" in the calculation below.
3815 * The indirect data buffer descriptor is contiguous
3816 * so the memory for that buffer will only be
3817 * registered if register_always is true. Hence add
3818 * one to mr_per_cmd if register_always has been set.
3820 mr_per_cmd = register_always +
3821 (target->scsi_host->max_sectors + 1 +
3822 max_sectors_per_mr - 1) / max_sectors_per_mr;
3824 mr_per_cmd = register_always +
3825 (target->sg_tablesize +
3826 srp_dev->max_pages_per_mr - 1) /
3827 srp_dev->max_pages_per_mr;
3829 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3830 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3831 max_sectors_per_mr, mr_per_cmd);
3834 target_host->sg_tablesize = target->sg_tablesize;
3835 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3836 target->mr_per_cmd = mr_per_cmd;
3837 target->indirect_size = target->sg_tablesize *
3838 sizeof (struct srp_direct_buf);
3839 target->max_iu_len = sizeof (struct srp_cmd) +
3840 sizeof (struct srp_indirect_buf) +
3841 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3843 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3844 INIT_WORK(&target->remove_work, srp_remove_work);
3845 spin_lock_init(&target->lock);
3846 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3851 target->ch_count = max_t(unsigned, num_online_nodes(),
3853 min(4 * num_online_nodes(),
3854 ibdev->num_comp_vectors),
3855 num_online_cpus()));
3856 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3862 for_each_online_node(node) {
3863 const int ch_start = (node_idx * target->ch_count /
3864 num_online_nodes());
3865 const int ch_end = ((node_idx + 1) * target->ch_count /
3866 num_online_nodes());
3867 const int cv_start = node_idx * ibdev->num_comp_vectors /
3869 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3873 for_each_online_cpu(cpu) {
3874 if (cpu_to_node(cpu) != node)
3876 if (ch_start + cpu_idx >= ch_end)
3878 ch = &target->ch[ch_start + cpu_idx];
3879 ch->target = target;
3880 ch->comp_vector = cv_start == cv_end ? cv_start :
3881 cv_start + cpu_idx % (cv_end - cv_start);
3882 spin_lock_init(&ch->lock);
3883 INIT_LIST_HEAD(&ch->free_tx);
3884 ret = srp_new_cm_id(ch);
3886 goto err_disconnect;
3888 ret = srp_create_ch_ib(ch);
3890 goto err_disconnect;
3892 ret = srp_alloc_req_data(ch);
3894 goto err_disconnect;
3896 ret = srp_connect_ch(ch, multich);
3900 if (target->using_rdma_cm)
3901 snprintf(dst, sizeof(dst), "%pIS",
3902 &target->rdma_cm.dst);
3904 snprintf(dst, sizeof(dst), "%pI6",
3905 target->ib_cm.orig_dgid.raw);
3906 shost_printk(KERN_ERR, target->scsi_host,
3907 PFX "Connection %d/%d to %s failed\n",
3909 target->ch_count, dst);
3910 if (node_idx == 0 && cpu_idx == 0) {
3913 srp_free_ch_ib(target, ch);
3914 srp_free_req_data(target, ch);
3915 target->ch_count = ch - target->ch;
3927 target->scsi_host->nr_hw_queues = target->ch_count;
3929 ret = srp_add_target(host, target);
3931 goto err_disconnect;
3933 if (target->state != SRP_TARGET_REMOVED) {
3934 if (target->using_rdma_cm) {
3935 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3936 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3937 be64_to_cpu(target->id_ext),
3938 be64_to_cpu(target->ioc_guid),
3939 target->sgid.raw, &target->rdma_cm.dst);
3941 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3942 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3943 be64_to_cpu(target->id_ext),
3944 be64_to_cpu(target->ioc_guid),
3945 be16_to_cpu(target->ib_cm.pkey),
3946 be64_to_cpu(target->ib_cm.service_id),
3948 target->ib_cm.orig_dgid.raw);
3955 mutex_unlock(&host->add_target_mutex);
3958 scsi_host_put(target->scsi_host);
3961 * If a call to srp_remove_target() has not been scheduled,
3962 * drop the network namespace reference now that was obtained
3963 * earlier in this function.
3965 if (target->state != SRP_TARGET_REMOVED)
3966 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3967 scsi_host_put(target->scsi_host);
3973 srp_disconnect_target(target);
3976 for (i = 0; i < target->ch_count; i++) {
3977 ch = &target->ch[i];
3978 srp_free_ch_ib(target, ch);
3979 srp_free_req_data(target, ch);
3986 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3988 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3991 struct srp_host *host = container_of(dev, struct srp_host, dev);
3993 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3996 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3998 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
4001 struct srp_host *host = container_of(dev, struct srp_host, dev);
4003 return sprintf(buf, "%d\n", host->port);
4006 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4008 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4010 struct srp_host *host;
4012 host = kzalloc(sizeof *host, GFP_KERNEL);
4016 INIT_LIST_HEAD(&host->target_list);
4017 spin_lock_init(&host->target_lock);
4018 init_completion(&host->released);
4019 mutex_init(&host->add_target_mutex);
4020 host->srp_dev = device;
4023 host->dev.class = &srp_class;
4024 host->dev.parent = device->dev->dev.parent;
4025 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
4027 if (device_register(&host->dev))
4029 if (device_create_file(&host->dev, &dev_attr_add_target))
4031 if (device_create_file(&host->dev, &dev_attr_ibdev))
4033 if (device_create_file(&host->dev, &dev_attr_port))
4039 device_unregister(&host->dev);
4047 static void srp_add_one(struct ib_device *device)
4049 struct srp_device *srp_dev;
4050 struct ib_device_attr *attr = &device->attrs;
4051 struct srp_host *host;
4052 int mr_page_shift, p;
4053 u64 max_pages_per_mr;
4054 unsigned int flags = 0;
4056 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4061 * Use the smallest page size supported by the HCA, down to a
4062 * minimum of 4096 bytes. We're unlikely to build large sglists
4063 * out of smaller entries.
4065 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
4066 srp_dev->mr_page_size = 1 << mr_page_shift;
4067 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
4068 max_pages_per_mr = attr->max_mr_size;
4069 do_div(max_pages_per_mr, srp_dev->mr_page_size);
4070 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4071 attr->max_mr_size, srp_dev->mr_page_size,
4072 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4073 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4076 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
4077 device->map_phys_fmr && device->unmap_fmr);
4078 srp_dev->has_fr = (attr->device_cap_flags &
4079 IB_DEVICE_MEM_MGT_EXTENSIONS);
4080 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4081 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4082 } else if (!never_register &&
4083 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4084 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4085 (!srp_dev->has_fmr || prefer_fr));
4086 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4089 if (never_register || !register_always ||
4090 (!srp_dev->has_fmr && !srp_dev->has_fr))
4091 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4093 if (srp_dev->use_fast_reg) {
4094 srp_dev->max_pages_per_mr =
4095 min_t(u32, srp_dev->max_pages_per_mr,
4096 attr->max_fast_reg_page_list_len);
4098 srp_dev->mr_max_size = srp_dev->mr_page_size *
4099 srp_dev->max_pages_per_mr;
4100 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4101 device->name, mr_page_shift, attr->max_mr_size,
4102 attr->max_fast_reg_page_list_len,
4103 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4105 INIT_LIST_HEAD(&srp_dev->dev_list);
4107 srp_dev->dev = device;
4108 srp_dev->pd = ib_alloc_pd(device, flags);
4109 if (IS_ERR(srp_dev->pd))
4112 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4113 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4114 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4117 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
4118 host = srp_add_port(srp_dev, p);
4120 list_add_tail(&host->list, &srp_dev->dev_list);
4123 ib_set_client_data(device, &srp_client, srp_dev);
4130 static void srp_remove_one(struct ib_device *device, void *client_data)
4132 struct srp_device *srp_dev;
4133 struct srp_host *host, *tmp_host;
4134 struct srp_target_port *target;
4136 srp_dev = client_data;
4140 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4141 device_unregister(&host->dev);
4143 * Wait for the sysfs entry to go away, so that no new
4144 * target ports can be created.
4146 wait_for_completion(&host->released);
4149 * Remove all target ports.
4151 spin_lock(&host->target_lock);
4152 list_for_each_entry(target, &host->target_list, list)
4153 srp_queue_remove_work(target);
4154 spin_unlock(&host->target_lock);
4157 * srp_queue_remove_work() queues a call to
4158 * srp_remove_target(). The latter function cancels
4159 * target->tl_err_work so waiting for the remove works to
4160 * finish is sufficient.
4162 flush_workqueue(srp_remove_wq);
4167 ib_dealloc_pd(srp_dev->pd);
4172 static struct srp_function_template ib_srp_transport_functions = {
4173 .has_rport_state = true,
4174 .reset_timer_if_blocked = true,
4175 .reconnect_delay = &srp_reconnect_delay,
4176 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
4177 .dev_loss_tmo = &srp_dev_loss_tmo,
4178 .reconnect = srp_rport_reconnect,
4179 .rport_delete = srp_rport_delete,
4180 .terminate_rport_io = srp_terminate_io,
4183 static int __init srp_init_module(void)
4187 if (srp_sg_tablesize) {
4188 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4189 if (!cmd_sg_entries)
4190 cmd_sg_entries = srp_sg_tablesize;
4193 if (!cmd_sg_entries)
4194 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4196 if (cmd_sg_entries > 255) {
4197 pr_warn("Clamping cmd_sg_entries to 255\n");
4198 cmd_sg_entries = 255;
4201 if (!indirect_sg_entries)
4202 indirect_sg_entries = cmd_sg_entries;
4203 else if (indirect_sg_entries < cmd_sg_entries) {
4204 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4206 indirect_sg_entries = cmd_sg_entries;
4209 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4210 pr_warn("Clamping indirect_sg_entries to %u\n",
4212 indirect_sg_entries = SG_MAX_SEGMENTS;
4215 srp_remove_wq = create_workqueue("srp_remove");
4216 if (!srp_remove_wq) {
4222 ib_srp_transport_template =
4223 srp_attach_transport(&ib_srp_transport_functions);
4224 if (!ib_srp_transport_template)
4227 ret = class_register(&srp_class);
4229 pr_err("couldn't register class infiniband_srp\n");
4233 ib_sa_register_client(&srp_sa_client);
4235 ret = ib_register_client(&srp_client);
4237 pr_err("couldn't register IB client\n");
4245 ib_sa_unregister_client(&srp_sa_client);
4246 class_unregister(&srp_class);
4249 srp_release_transport(ib_srp_transport_template);
4252 destroy_workqueue(srp_remove_wq);
4256 static void __exit srp_cleanup_module(void)
4258 ib_unregister_client(&srp_client);
4259 ib_sa_unregister_client(&srp_sa_client);
4260 class_unregister(&srp_class);
4261 srp_release_transport(ib_srp_transport_template);
4262 destroy_workqueue(srp_remove_wq);
4265 module_init(srp_init_module);
4266 module_exit(srp_cleanup_module);