GNU Linux-libre 5.19.9-gnu
[releases.git] / drivers / scsi / xen-scsifront.c
1 /*
2  * Xen SCSI frontend driver
3  *
4  * Copyright (c) 2008, FUJITSU Limited
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License version 2
8  * as published by the Free Software Foundation; or, when distributed
9  * separately from the Linux kernel or incorporated into other
10  * software packages, subject to the following license:
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this source file (the "Software"), to deal in the Software without
14  * restriction, including without limitation the rights to use, copy, modify,
15  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
16  * and to permit persons to whom the Software is furnished to do so, subject to
17  * the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in
20  * all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
28  * IN THE SOFTWARE.
29  */
30
31 #include <linux/module.h>
32 #include <linux/kernel.h>
33 #include <linux/device.h>
34 #include <linux/wait.h>
35 #include <linux/interrupt.h>
36 #include <linux/mutex.h>
37 #include <linux/spinlock.h>
38 #include <linux/sched.h>
39 #include <linux/blkdev.h>
40 #include <linux/pfn.h>
41 #include <linux/slab.h>
42 #include <linux/bitops.h>
43
44 #include <scsi/scsi_cmnd.h>
45 #include <scsi/scsi_device.h>
46 #include <scsi/scsi.h>
47 #include <scsi/scsi_host.h>
48
49 #include <xen/xen.h>
50 #include <xen/xenbus.h>
51 #include <xen/grant_table.h>
52 #include <xen/events.h>
53 #include <xen/page.h>
54
55 #include <xen/interface/grant_table.h>
56 #include <xen/interface/io/vscsiif.h>
57 #include <xen/interface/io/protocols.h>
58
59 #include <asm/xen/hypervisor.h>
60
61 #define VSCSIFRONT_OP_ADD_LUN   1
62 #define VSCSIFRONT_OP_DEL_LUN   2
63 #define VSCSIFRONT_OP_READD_LUN 3
64
65 /* Tuning point. */
66 #define VSCSIIF_DEFAULT_CMD_PER_LUN 10
67 #define VSCSIIF_MAX_TARGET          64
68 #define VSCSIIF_MAX_LUN             255
69
70 #define VSCSIIF_RING_SIZE       __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
71 #define VSCSIIF_MAX_REQS        VSCSIIF_RING_SIZE
72
73 #define vscsiif_grants_sg(_sg)  (PFN_UP((_sg) *         \
74                                 sizeof(struct scsiif_request_segment)))
75
76 struct vscsifrnt_shadow {
77         /* command between backend and frontend */
78         unsigned char act;
79         uint8_t nr_segments;
80         uint16_t rqid;
81         uint16_t ref_rqid;
82
83         bool inflight;
84
85         unsigned int nr_grants;         /* number of grants in gref[] */
86         struct scsiif_request_segment *sg;      /* scatter/gather elements */
87         struct scsiif_request_segment seg[VSCSIIF_SG_TABLESIZE];
88
89         /* Do reset or abort function. */
90         wait_queue_head_t wq_reset;     /* reset work queue           */
91         int wait_reset;                 /* reset work queue condition */
92         int32_t rslt_reset;             /* reset response status:     */
93                                         /* SUCCESS or FAILED or:      */
94 #define RSLT_RESET_WAITING      0
95 #define RSLT_RESET_ERR          -1
96
97         /* Requested struct scsi_cmnd is stored from kernel. */
98         struct scsi_cmnd *sc;
99         int gref[vscsiif_grants_sg(SG_ALL) + SG_ALL];
100 };
101
102 struct vscsifrnt_info {
103         struct xenbus_device *dev;
104
105         struct Scsi_Host *host;
106         enum {
107                 STATE_INACTIVE,
108                 STATE_ACTIVE,
109                 STATE_ERROR
110         }  host_active;
111
112         unsigned int evtchn;
113         unsigned int irq;
114
115         grant_ref_t ring_ref;
116         struct vscsiif_front_ring ring;
117         struct vscsiif_response ring_rsp;
118
119         spinlock_t shadow_lock;
120         DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
121         struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
122
123         /* Following items are protected by the host lock. */
124         wait_queue_head_t wq_sync;
125         wait_queue_head_t wq_pause;
126         unsigned int wait_ring_available:1;
127         unsigned int waiting_pause:1;
128         unsigned int pause:1;
129         unsigned callers;
130
131         char dev_state_path[64];
132         struct task_struct *curr;
133 };
134
135 static DEFINE_MUTEX(scsifront_mutex);
136
137 static void scsifront_wake_up(struct vscsifrnt_info *info)
138 {
139         info->wait_ring_available = 0;
140         wake_up(&info->wq_sync);
141 }
142
143 static int scsifront_get_rqid(struct vscsifrnt_info *info)
144 {
145         unsigned long flags;
146         int free;
147
148         spin_lock_irqsave(&info->shadow_lock, flags);
149
150         free = find_first_bit(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
151         __clear_bit(free, info->shadow_free_bitmap);
152
153         spin_unlock_irqrestore(&info->shadow_lock, flags);
154
155         return free;
156 }
157
158 static int _scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
159 {
160         int empty = bitmap_empty(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
161
162         __set_bit(id, info->shadow_free_bitmap);
163         info->shadow[id] = NULL;
164
165         return empty || info->wait_ring_available;
166 }
167
168 static void scsifront_put_rqid(struct vscsifrnt_info *info, uint32_t id)
169 {
170         unsigned long flags;
171         int kick;
172
173         spin_lock_irqsave(&info->shadow_lock, flags);
174         kick = _scsifront_put_rqid(info, id);
175         spin_unlock_irqrestore(&info->shadow_lock, flags);
176
177         if (kick)
178                 scsifront_wake_up(info);
179 }
180
181 static int scsifront_do_request(struct vscsifrnt_info *info,
182                                 struct vscsifrnt_shadow *shadow)
183 {
184         struct vscsiif_front_ring *ring = &(info->ring);
185         struct vscsiif_request *ring_req;
186         struct scsi_cmnd *sc = shadow->sc;
187         uint32_t id;
188         int i, notify;
189
190         if (RING_FULL(&info->ring))
191                 return -EBUSY;
192
193         id = scsifront_get_rqid(info);  /* use id in response */
194         if (id >= VSCSIIF_MAX_REQS)
195                 return -EBUSY;
196
197         info->shadow[id] = shadow;
198         shadow->rqid = id;
199
200         ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
201         ring->req_prod_pvt++;
202
203         ring_req->rqid        = id;
204         ring_req->act         = shadow->act;
205         ring_req->ref_rqid    = shadow->ref_rqid;
206         ring_req->nr_segments = shadow->nr_segments;
207
208         ring_req->id      = sc->device->id;
209         ring_req->lun     = sc->device->lun;
210         ring_req->channel = sc->device->channel;
211         ring_req->cmd_len = sc->cmd_len;
212
213         BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
214
215         memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
216
217         ring_req->sc_data_direction   = (uint8_t)sc->sc_data_direction;
218         ring_req->timeout_per_command = scsi_cmd_to_rq(sc)->timeout / HZ;
219
220         for (i = 0; i < (shadow->nr_segments & ~VSCSIIF_SG_GRANT); i++)
221                 ring_req->seg[i] = shadow->seg[i];
222
223         shadow->inflight = true;
224
225         RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
226         if (notify)
227                 notify_remote_via_irq(info->irq);
228
229         return 0;
230 }
231
232 static void scsifront_set_error(struct vscsifrnt_info *info, const char *msg)
233 {
234         shost_printk(KERN_ERR, info->host, KBUILD_MODNAME "%s\n"
235                      "Disabling device for further use\n", msg);
236         info->host_active = STATE_ERROR;
237 }
238
239 static void scsifront_gnttab_done(struct vscsifrnt_info *info,
240                                   struct vscsifrnt_shadow *shadow)
241 {
242         int i;
243
244         if (shadow->sc->sc_data_direction == DMA_NONE)
245                 return;
246
247         for (i = 0; i < shadow->nr_grants; i++) {
248                 if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
249                         scsifront_set_error(info, "grant still in use by backend");
250                         return;
251                 }
252         }
253
254         kfree(shadow->sg);
255 }
256
257 static unsigned int scsifront_host_byte(int32_t rslt)
258 {
259         switch (XEN_VSCSIIF_RSLT_HOST(rslt)) {
260         case XEN_VSCSIIF_RSLT_HOST_OK:
261                 return DID_OK;
262         case XEN_VSCSIIF_RSLT_HOST_NO_CONNECT:
263                 return DID_NO_CONNECT;
264         case XEN_VSCSIIF_RSLT_HOST_BUS_BUSY:
265                 return DID_BUS_BUSY;
266         case XEN_VSCSIIF_RSLT_HOST_TIME_OUT:
267                 return DID_TIME_OUT;
268         case XEN_VSCSIIF_RSLT_HOST_BAD_TARGET:
269                 return DID_BAD_TARGET;
270         case XEN_VSCSIIF_RSLT_HOST_ABORT:
271                 return DID_ABORT;
272         case XEN_VSCSIIF_RSLT_HOST_PARITY:
273                 return DID_PARITY;
274         case XEN_VSCSIIF_RSLT_HOST_ERROR:
275                 return DID_ERROR;
276         case XEN_VSCSIIF_RSLT_HOST_RESET:
277                 return DID_RESET;
278         case XEN_VSCSIIF_RSLT_HOST_BAD_INTR:
279                 return DID_BAD_INTR;
280         case XEN_VSCSIIF_RSLT_HOST_PASSTHROUGH:
281                 return DID_PASSTHROUGH;
282         case XEN_VSCSIIF_RSLT_HOST_SOFT_ERROR:
283                 return DID_SOFT_ERROR;
284         case XEN_VSCSIIF_RSLT_HOST_IMM_RETRY:
285                 return DID_IMM_RETRY;
286         case XEN_VSCSIIF_RSLT_HOST_REQUEUE:
287                 return DID_REQUEUE;
288         case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_DISRUPTED:
289                 return DID_TRANSPORT_DISRUPTED;
290         case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_FAILFAST:
291                 return DID_TRANSPORT_FAILFAST;
292         case XEN_VSCSIIF_RSLT_HOST_TARGET_FAILURE:
293                 return DID_TARGET_FAILURE;
294         case XEN_VSCSIIF_RSLT_HOST_NEXUS_FAILURE:
295                 return DID_NEXUS_FAILURE;
296         case XEN_VSCSIIF_RSLT_HOST_ALLOC_FAILURE:
297                 return DID_ALLOC_FAILURE;
298         case XEN_VSCSIIF_RSLT_HOST_MEDIUM_ERROR:
299                 return DID_MEDIUM_ERROR;
300         case XEN_VSCSIIF_RSLT_HOST_TRANSPORT_MARGINAL:
301                 return DID_TRANSPORT_MARGINAL;
302         default:
303                 return DID_ERROR;
304         }
305 }
306
307 static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
308                                    struct vscsiif_response *ring_rsp)
309 {
310         struct vscsifrnt_shadow *shadow;
311         struct scsi_cmnd *sc;
312         uint32_t id;
313         uint8_t sense_len;
314
315         id = ring_rsp->rqid;
316         shadow = info->shadow[id];
317         sc = shadow->sc;
318
319         BUG_ON(sc == NULL);
320
321         scsifront_gnttab_done(info, shadow);
322         if (info->host_active == STATE_ERROR)
323                 return;
324         scsifront_put_rqid(info, id);
325
326         set_host_byte(sc, scsifront_host_byte(ring_rsp->rslt));
327         set_status_byte(sc, XEN_VSCSIIF_RSLT_STATUS(ring_rsp->rslt));
328         scsi_set_resid(sc, ring_rsp->residual_len);
329
330         sense_len = min_t(uint8_t, VSCSIIF_SENSE_BUFFERSIZE,
331                           ring_rsp->sense_len);
332
333         if (sense_len)
334                 memcpy(sc->sense_buffer, ring_rsp->sense_buffer, sense_len);
335
336         scsi_done(sc);
337 }
338
339 static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
340                                     struct vscsiif_response *ring_rsp)
341 {
342         uint16_t id = ring_rsp->rqid;
343         unsigned long flags;
344         struct vscsifrnt_shadow *shadow = info->shadow[id];
345         int kick;
346
347         spin_lock_irqsave(&info->shadow_lock, flags);
348         shadow->wait_reset = 1;
349         switch (shadow->rslt_reset) {
350         case RSLT_RESET_WAITING:
351                 if (ring_rsp->rslt == XEN_VSCSIIF_RSLT_RESET_SUCCESS)
352                         shadow->rslt_reset = SUCCESS;
353                 else
354                         shadow->rslt_reset = FAILED;
355                 break;
356         case RSLT_RESET_ERR:
357                 kick = _scsifront_put_rqid(info, id);
358                 spin_unlock_irqrestore(&info->shadow_lock, flags);
359                 kfree(shadow);
360                 if (kick)
361                         scsifront_wake_up(info);
362                 return;
363         default:
364                 scsifront_set_error(info, "bad reset state");
365                 break;
366         }
367         spin_unlock_irqrestore(&info->shadow_lock, flags);
368
369         wake_up(&shadow->wq_reset);
370 }
371
372 static void scsifront_do_response(struct vscsifrnt_info *info,
373                                   struct vscsiif_response *ring_rsp)
374 {
375         struct vscsifrnt_shadow *shadow;
376
377         if (ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
378             !info->shadow[ring_rsp->rqid]->inflight) {
379                 scsifront_set_error(info, "illegal rqid returned by backend!");
380                 return;
381         }
382         shadow = info->shadow[ring_rsp->rqid];
383         shadow->inflight = false;
384
385         if (shadow->act == VSCSIIF_ACT_SCSI_CDB)
386                 scsifront_cdb_cmd_done(info, ring_rsp);
387         else
388                 scsifront_sync_cmd_done(info, ring_rsp);
389 }
390
391 static int scsifront_ring_drain(struct vscsifrnt_info *info,
392                                 unsigned int *eoiflag)
393 {
394         struct vscsiif_response ring_rsp;
395         RING_IDX i, rp;
396         int more_to_do = 0;
397
398         rp = READ_ONCE(info->ring.sring->rsp_prod);
399         virt_rmb();     /* ordering required respective to backend */
400         if (RING_RESPONSE_PROD_OVERFLOW(&info->ring, rp)) {
401                 scsifront_set_error(info, "illegal number of responses");
402                 return 0;
403         }
404         for (i = info->ring.rsp_cons; i != rp; i++) {
405                 RING_COPY_RESPONSE(&info->ring, i, &ring_rsp);
406                 scsifront_do_response(info, &ring_rsp);
407                 if (info->host_active == STATE_ERROR)
408                         return 0;
409                 *eoiflag &= ~XEN_EOI_FLAG_SPURIOUS;
410         }
411
412         info->ring.rsp_cons = i;
413
414         if (i != info->ring.req_prod_pvt)
415                 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
416         else
417                 info->ring.sring->rsp_event = i + 1;
418
419         return more_to_do;
420 }
421
422 static int scsifront_cmd_done(struct vscsifrnt_info *info,
423                               unsigned int *eoiflag)
424 {
425         int more_to_do;
426         unsigned long flags;
427
428         spin_lock_irqsave(info->host->host_lock, flags);
429
430         more_to_do = scsifront_ring_drain(info, eoiflag);
431
432         info->wait_ring_available = 0;
433
434         spin_unlock_irqrestore(info->host->host_lock, flags);
435
436         wake_up(&info->wq_sync);
437
438         return more_to_do;
439 }
440
441 static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
442 {
443         struct vscsifrnt_info *info = dev_id;
444         unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
445
446         if (info->host_active == STATE_ERROR) {
447                 xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
448                 return IRQ_HANDLED;
449         }
450
451         while (scsifront_cmd_done(info, &eoiflag))
452                 /* Yield point for this unbounded loop. */
453                 cond_resched();
454
455         xen_irq_lateeoi(irq, eoiflag);
456
457         return IRQ_HANDLED;
458 }
459
460 static void scsifront_finish_all(struct vscsifrnt_info *info)
461 {
462         unsigned int i, dummy;
463         struct vscsiif_response resp;
464
465         scsifront_ring_drain(info, &dummy);
466
467         for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
468                 if (test_bit(i, info->shadow_free_bitmap))
469                         continue;
470                 resp.rqid = i;
471                 resp.sense_len = 0;
472                 resp.rslt = DID_RESET << 16;
473                 resp.residual_len = 0;
474                 scsifront_do_response(info, &resp);
475         }
476 }
477
478 static int map_data_for_request(struct vscsifrnt_info *info,
479                                 struct scsi_cmnd *sc,
480                                 struct vscsifrnt_shadow *shadow)
481 {
482         grant_ref_t gref_head;
483         struct page *page;
484         int err, ref, ref_cnt = 0;
485         int grant_ro = (sc->sc_data_direction == DMA_TO_DEVICE);
486         unsigned int i, off, len, bytes;
487         unsigned int data_len = scsi_bufflen(sc);
488         unsigned int data_grants = 0, seg_grants = 0;
489         struct scatterlist *sg;
490         struct scsiif_request_segment *seg;
491
492         if (sc->sc_data_direction == DMA_NONE || !data_len)
493                 return 0;
494
495         scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i)
496                 data_grants += PFN_UP(sg->offset + sg->length);
497
498         if (data_grants > VSCSIIF_SG_TABLESIZE) {
499                 if (data_grants > info->host->sg_tablesize) {
500                         shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
501                              "Unable to map request_buffer for command!\n");
502                         return -E2BIG;
503                 }
504                 seg_grants = vscsiif_grants_sg(data_grants);
505                 shadow->sg = kcalloc(data_grants,
506                         sizeof(struct scsiif_request_segment), GFP_ATOMIC);
507                 if (!shadow->sg)
508                         return -ENOMEM;
509         }
510         seg = shadow->sg ? : shadow->seg;
511
512         err = gnttab_alloc_grant_references(seg_grants + data_grants,
513                                             &gref_head);
514         if (err) {
515                 kfree(shadow->sg);
516                 shost_printk(KERN_ERR, info->host, KBUILD_MODNAME
517                              "gnttab_alloc_grant_references() error\n");
518                 return -ENOMEM;
519         }
520
521         if (seg_grants) {
522                 page = virt_to_page(seg);
523                 off = offset_in_page(seg);
524                 len = sizeof(struct scsiif_request_segment) * data_grants;
525                 while (len > 0) {
526                         bytes = min_t(unsigned int, len, PAGE_SIZE - off);
527
528                         ref = gnttab_claim_grant_reference(&gref_head);
529                         BUG_ON(ref == -ENOSPC);
530
531                         gnttab_grant_foreign_access_ref(ref,
532                                 info->dev->otherend_id,
533                                 xen_page_to_gfn(page), 1);
534                         shadow->gref[ref_cnt] = ref;
535                         shadow->seg[ref_cnt].gref   = ref;
536                         shadow->seg[ref_cnt].offset = (uint16_t)off;
537                         shadow->seg[ref_cnt].length = (uint16_t)bytes;
538
539                         page++;
540                         len -= bytes;
541                         off = 0;
542                         ref_cnt++;
543                 }
544                 BUG_ON(seg_grants < ref_cnt);
545                 seg_grants = ref_cnt;
546         }
547
548         scsi_for_each_sg(sc, sg, scsi_sg_count(sc), i) {
549                 page = sg_page(sg);
550                 off = sg->offset;
551                 len = sg->length;
552
553                 while (len > 0 && data_len > 0) {
554                         /*
555                          * sg sends a scatterlist that is larger than
556                          * the data_len it wants transferred for certain
557                          * IO sizes.
558                          */
559                         bytes = min_t(unsigned int, len, PAGE_SIZE - off);
560                         bytes = min(bytes, data_len);
561
562                         ref = gnttab_claim_grant_reference(&gref_head);
563                         BUG_ON(ref == -ENOSPC);
564
565                         gnttab_grant_foreign_access_ref(ref,
566                                 info->dev->otherend_id,
567                                 xen_page_to_gfn(page),
568                                 grant_ro);
569
570                         shadow->gref[ref_cnt] = ref;
571                         seg->gref   = ref;
572                         seg->offset = (uint16_t)off;
573                         seg->length = (uint16_t)bytes;
574
575                         page++;
576                         seg++;
577                         len -= bytes;
578                         data_len -= bytes;
579                         off = 0;
580                         ref_cnt++;
581                 }
582         }
583
584         if (seg_grants)
585                 shadow->nr_segments = VSCSIIF_SG_GRANT | seg_grants;
586         else
587                 shadow->nr_segments = (uint8_t)ref_cnt;
588         shadow->nr_grants = ref_cnt;
589
590         return 0;
591 }
592
593 static int scsifront_enter(struct vscsifrnt_info *info)
594 {
595         if (info->pause)
596                 return 1;
597         info->callers++;
598         return 0;
599 }
600
601 static void scsifront_return(struct vscsifrnt_info *info)
602 {
603         info->callers--;
604         if (info->callers)
605                 return;
606
607         if (!info->waiting_pause)
608                 return;
609
610         info->waiting_pause = 0;
611         wake_up(&info->wq_pause);
612 }
613
614 static int scsifront_queuecommand(struct Scsi_Host *shost,
615                                   struct scsi_cmnd *sc)
616 {
617         struct vscsifrnt_info *info = shost_priv(shost);
618         struct vscsifrnt_shadow *shadow = scsi_cmd_priv(sc);
619         unsigned long flags;
620         int err;
621
622         if (info->host_active == STATE_ERROR)
623                 return SCSI_MLQUEUE_HOST_BUSY;
624
625         sc->result = 0;
626
627         shadow->sc  = sc;
628         shadow->act = VSCSIIF_ACT_SCSI_CDB;
629
630         spin_lock_irqsave(shost->host_lock, flags);
631         if (scsifront_enter(info)) {
632                 spin_unlock_irqrestore(shost->host_lock, flags);
633                 return SCSI_MLQUEUE_HOST_BUSY;
634         }
635
636         err = map_data_for_request(info, sc, shadow);
637         if (err < 0) {
638                 pr_debug("%s: err %d\n", __func__, err);
639                 scsifront_return(info);
640                 spin_unlock_irqrestore(shost->host_lock, flags);
641                 if (err == -ENOMEM)
642                         return SCSI_MLQUEUE_HOST_BUSY;
643                 sc->result = DID_ERROR << 16;
644                 scsi_done(sc);
645                 return 0;
646         }
647
648         if (scsifront_do_request(info, shadow)) {
649                 scsifront_gnttab_done(info, shadow);
650                 goto busy;
651         }
652
653         scsifront_return(info);
654         spin_unlock_irqrestore(shost->host_lock, flags);
655
656         return 0;
657
658 busy:
659         scsifront_return(info);
660         spin_unlock_irqrestore(shost->host_lock, flags);
661         pr_debug("%s: busy\n", __func__);
662         return SCSI_MLQUEUE_HOST_BUSY;
663 }
664
665 /*
666  * Any exception handling (reset or abort) must be forwarded to the backend.
667  * We have to wait until an answer is returned. This answer contains the
668  * result to be returned to the requestor.
669  */
670 static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
671 {
672         struct Scsi_Host *host = sc->device->host;
673         struct vscsifrnt_info *info = shost_priv(host);
674         struct vscsifrnt_shadow *shadow, *s = scsi_cmd_priv(sc);
675         int err = 0;
676
677         if (info->host_active == STATE_ERROR)
678                 return FAILED;
679
680         shadow = kzalloc(sizeof(*shadow), GFP_NOIO);
681         if (!shadow)
682                 return FAILED;
683
684         shadow->act = act;
685         shadow->rslt_reset = RSLT_RESET_WAITING;
686         shadow->sc = sc;
687         shadow->ref_rqid = s->rqid;
688         init_waitqueue_head(&shadow->wq_reset);
689
690         spin_lock_irq(host->host_lock);
691
692         for (;;) {
693                 if (scsifront_enter(info))
694                         goto fail;
695
696                 if (!scsifront_do_request(info, shadow))
697                         break;
698
699                 scsifront_return(info);
700                 if (err)
701                         goto fail;
702                 info->wait_ring_available = 1;
703                 spin_unlock_irq(host->host_lock);
704                 err = wait_event_interruptible(info->wq_sync,
705                                                !info->wait_ring_available);
706                 spin_lock_irq(host->host_lock);
707         }
708
709         spin_unlock_irq(host->host_lock);
710         err = wait_event_interruptible(shadow->wq_reset, shadow->wait_reset);
711         spin_lock_irq(host->host_lock);
712
713         if (!err) {
714                 err = shadow->rslt_reset;
715                 scsifront_put_rqid(info, shadow->rqid);
716                 kfree(shadow);
717         } else {
718                 spin_lock(&info->shadow_lock);
719                 shadow->rslt_reset = RSLT_RESET_ERR;
720                 spin_unlock(&info->shadow_lock);
721                 err = FAILED;
722         }
723
724         scsifront_return(info);
725         spin_unlock_irq(host->host_lock);
726         return err;
727
728 fail:
729         spin_unlock_irq(host->host_lock);
730         kfree(shadow);
731         return FAILED;
732 }
733
734 static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
735 {
736         pr_debug("%s\n", __func__);
737         return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_ABORT);
738 }
739
740 static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
741 {
742         pr_debug("%s\n", __func__);
743         return scsifront_action_handler(sc, VSCSIIF_ACT_SCSI_RESET);
744 }
745
746 static int scsifront_sdev_configure(struct scsi_device *sdev)
747 {
748         struct vscsifrnt_info *info = shost_priv(sdev->host);
749         int err;
750
751         if (info->host_active == STATE_ERROR)
752                 return -EIO;
753
754         if (info && current == info->curr) {
755                 err = xenbus_printf(XBT_NIL, info->dev->nodename,
756                               info->dev_state_path, "%d", XenbusStateConnected);
757                 if (err) {
758                         xenbus_dev_error(info->dev, err,
759                                 "%s: writing dev_state_path", __func__);
760                         return err;
761                 }
762         }
763
764         return 0;
765 }
766
767 static void scsifront_sdev_destroy(struct scsi_device *sdev)
768 {
769         struct vscsifrnt_info *info = shost_priv(sdev->host);
770         int err;
771
772         if (info && current == info->curr) {
773                 err = xenbus_printf(XBT_NIL, info->dev->nodename,
774                               info->dev_state_path, "%d", XenbusStateClosed);
775                 if (err)
776                         xenbus_dev_error(info->dev, err,
777                                 "%s: writing dev_state_path", __func__);
778         }
779 }
780
781 static struct scsi_host_template scsifront_sht = {
782         .module                 = THIS_MODULE,
783         .name                   = "Xen SCSI frontend driver",
784         .queuecommand           = scsifront_queuecommand,
785         .eh_abort_handler       = scsifront_eh_abort_handler,
786         .eh_device_reset_handler = scsifront_dev_reset_handler,
787         .slave_configure        = scsifront_sdev_configure,
788         .slave_destroy          = scsifront_sdev_destroy,
789         .cmd_per_lun            = VSCSIIF_DEFAULT_CMD_PER_LUN,
790         .can_queue              = VSCSIIF_MAX_REQS,
791         .this_id                = -1,
792         .cmd_size               = sizeof(struct vscsifrnt_shadow),
793         .sg_tablesize           = VSCSIIF_SG_TABLESIZE,
794         .proc_name              = "scsifront",
795 };
796
797 static int scsifront_alloc_ring(struct vscsifrnt_info *info)
798 {
799         struct xenbus_device *dev = info->dev;
800         struct vscsiif_sring *sring;
801         int err;
802
803         /***** Frontend to Backend ring start *****/
804         err = xenbus_setup_ring(dev, GFP_KERNEL, (void **)&sring, 1,
805                                 &info->ring_ref);
806         if (err)
807                 return err;
808
809         XEN_FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
810
811         err = xenbus_alloc_evtchn(dev, &info->evtchn);
812         if (err) {
813                 xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
814                 goto free_gnttab;
815         }
816
817         err = bind_evtchn_to_irq_lateeoi(info->evtchn);
818         if (err <= 0) {
819                 xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
820                 goto free_gnttab;
821         }
822
823         info->irq = err;
824
825         err = request_threaded_irq(info->irq, NULL, scsifront_irq_fn,
826                                    IRQF_ONESHOT, "scsifront", info);
827         if (err) {
828                 xenbus_dev_fatal(dev, err, "request_threaded_irq");
829                 goto free_irq;
830         }
831
832         return 0;
833
834 /* free resource */
835 free_irq:
836         unbind_from_irqhandler(info->irq, info);
837 free_gnttab:
838         xenbus_teardown_ring((void **)&sring, 1, &info->ring_ref);
839
840         return err;
841 }
842
843 static void scsifront_free_ring(struct vscsifrnt_info *info)
844 {
845         unbind_from_irqhandler(info->irq, info);
846         xenbus_teardown_ring((void **)&info->ring.sring, 1, &info->ring_ref);
847 }
848
849 static int scsifront_init_ring(struct vscsifrnt_info *info)
850 {
851         struct xenbus_device *dev = info->dev;
852         struct xenbus_transaction xbt;
853         int err;
854
855         pr_debug("%s\n", __func__);
856
857         err = scsifront_alloc_ring(info);
858         if (err)
859                 return err;
860         pr_debug("%s: %u %u\n", __func__, info->ring_ref, info->evtchn);
861
862 again:
863         err = xenbus_transaction_start(&xbt);
864         if (err)
865                 xenbus_dev_fatal(dev, err, "starting transaction");
866
867         err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
868                             info->ring_ref);
869         if (err) {
870                 xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
871                 goto fail;
872         }
873
874         err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
875                             info->evtchn);
876
877         if (err) {
878                 xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
879                 goto fail;
880         }
881
882         err = xenbus_transaction_end(xbt, 0);
883         if (err) {
884                 if (err == -EAGAIN)
885                         goto again;
886                 xenbus_dev_fatal(dev, err, "completing transaction");
887                 goto free_sring;
888         }
889
890         return 0;
891
892 fail:
893         xenbus_transaction_end(xbt, 1);
894 free_sring:
895         scsifront_free_ring(info);
896
897         return err;
898 }
899
900
901 static int scsifront_probe(struct xenbus_device *dev,
902                            const struct xenbus_device_id *id)
903 {
904         struct vscsifrnt_info *info;
905         struct Scsi_Host *host;
906         int err = -ENOMEM;
907         char name[TASK_COMM_LEN];
908
909         host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
910         if (!host) {
911                 xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
912                 return err;
913         }
914         info = (struct vscsifrnt_info *)host->hostdata;
915
916         dev_set_drvdata(&dev->dev, info);
917         info->dev = dev;
918
919         bitmap_fill(info->shadow_free_bitmap, VSCSIIF_MAX_REQS);
920
921         err = scsifront_init_ring(info);
922         if (err) {
923                 scsi_host_put(host);
924                 return err;
925         }
926
927         init_waitqueue_head(&info->wq_sync);
928         init_waitqueue_head(&info->wq_pause);
929         spin_lock_init(&info->shadow_lock);
930
931         snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
932
933         host->max_id      = VSCSIIF_MAX_TARGET;
934         host->max_channel = 0;
935         host->max_lun     = VSCSIIF_MAX_LUN;
936         host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
937         host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
938
939         err = scsi_add_host(host, &dev->dev);
940         if (err) {
941                 dev_err(&dev->dev, "fail to add scsi host %d\n", err);
942                 goto free_sring;
943         }
944         info->host = host;
945         info->host_active = STATE_ACTIVE;
946
947         xenbus_switch_state(dev, XenbusStateInitialised);
948
949         return 0;
950
951 free_sring:
952         scsifront_free_ring(info);
953         scsi_host_put(host);
954         return err;
955 }
956
957 static int scsifront_resume(struct xenbus_device *dev)
958 {
959         struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
960         struct Scsi_Host *host = info->host;
961         int err;
962
963         spin_lock_irq(host->host_lock);
964
965         /* Finish all still pending commands. */
966         scsifront_finish_all(info);
967
968         spin_unlock_irq(host->host_lock);
969
970         /* Reconnect to dom0. */
971         scsifront_free_ring(info);
972         err = scsifront_init_ring(info);
973         if (err) {
974                 dev_err(&dev->dev, "fail to resume %d\n", err);
975                 scsi_host_put(host);
976                 return err;
977         }
978
979         xenbus_switch_state(dev, XenbusStateInitialised);
980
981         return 0;
982 }
983
984 static int scsifront_suspend(struct xenbus_device *dev)
985 {
986         struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
987         struct Scsi_Host *host = info->host;
988         int err = 0;
989
990         /* No new commands for the backend. */
991         spin_lock_irq(host->host_lock);
992         info->pause = 1;
993         while (info->callers && !err) {
994                 info->waiting_pause = 1;
995                 info->wait_ring_available = 0;
996                 spin_unlock_irq(host->host_lock);
997                 wake_up(&info->wq_sync);
998                 err = wait_event_interruptible(info->wq_pause,
999                                                !info->waiting_pause);
1000                 spin_lock_irq(host->host_lock);
1001         }
1002         spin_unlock_irq(host->host_lock);
1003         return err;
1004 }
1005
1006 static int scsifront_remove(struct xenbus_device *dev)
1007 {
1008         struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
1009
1010         pr_debug("%s: %s removed\n", __func__, dev->nodename);
1011
1012         mutex_lock(&scsifront_mutex);
1013         if (info->host_active != STATE_INACTIVE) {
1014                 /* Scsi_host not yet removed */
1015                 scsi_remove_host(info->host);
1016                 info->host_active = STATE_INACTIVE;
1017         }
1018         mutex_unlock(&scsifront_mutex);
1019
1020         scsifront_free_ring(info);
1021         scsi_host_put(info->host);
1022
1023         return 0;
1024 }
1025
1026 static void scsifront_disconnect(struct vscsifrnt_info *info)
1027 {
1028         struct xenbus_device *dev = info->dev;
1029         struct Scsi_Host *host = info->host;
1030
1031         pr_debug("%s: %s disconnect\n", __func__, dev->nodename);
1032
1033         /*
1034          * When this function is executed, all devices of
1035          * Frontend have been deleted.
1036          * Therefore, it need not block I/O before remove_host.
1037          */
1038
1039         mutex_lock(&scsifront_mutex);
1040         if (info->host_active != STATE_INACTIVE) {
1041                 scsi_remove_host(host);
1042                 info->host_active = STATE_INACTIVE;
1043         }
1044         mutex_unlock(&scsifront_mutex);
1045
1046         xenbus_frontend_closed(dev);
1047 }
1048
1049 static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
1050 {
1051         struct xenbus_device *dev = info->dev;
1052         int i, err = 0;
1053         char str[64];
1054         char **dir;
1055         unsigned int dir_n = 0;
1056         unsigned int device_state;
1057         unsigned int hst, chn, tgt, lun;
1058         struct scsi_device *sdev;
1059
1060         if (info->host_active == STATE_ERROR)
1061                 return;
1062
1063         dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
1064         if (IS_ERR(dir))
1065                 return;
1066
1067         /* mark current task as the one allowed to modify device states */
1068         BUG_ON(info->curr);
1069         info->curr = current;
1070
1071         for (i = 0; i < dir_n; i++) {
1072                 /* read status */
1073                 snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
1074                 err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
1075                                    &device_state);
1076                 if (XENBUS_EXIST_ERR(err))
1077                         continue;
1078
1079                 /* virtual SCSI device */
1080                 snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
1081                 err = xenbus_scanf(XBT_NIL, dev->otherend, str,
1082                                    "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
1083                 if (XENBUS_EXIST_ERR(err))
1084                         continue;
1085
1086                 /*
1087                  * Front device state path, used in slave_configure called
1088                  * on successfull scsi_add_device, and in slave_destroy called
1089                  * on remove of a device.
1090                  */
1091                 snprintf(info->dev_state_path, sizeof(info->dev_state_path),
1092                          "vscsi-devs/%s/state", dir[i]);
1093
1094                 switch (op) {
1095                 case VSCSIFRONT_OP_ADD_LUN:
1096                         if (device_state != XenbusStateInitialised)
1097                                 break;
1098
1099                         if (scsi_add_device(info->host, chn, tgt, lun)) {
1100                                 dev_err(&dev->dev, "scsi_add_device\n");
1101                                 err = xenbus_printf(XBT_NIL, dev->nodename,
1102                                               info->dev_state_path,
1103                                               "%d", XenbusStateClosed);
1104                                 if (err)
1105                                         xenbus_dev_error(dev, err,
1106                                                 "%s: writing dev_state_path", __func__);
1107                         }
1108                         break;
1109                 case VSCSIFRONT_OP_DEL_LUN:
1110                         if (device_state != XenbusStateClosing)
1111                                 break;
1112
1113                         sdev = scsi_device_lookup(info->host, chn, tgt, lun);
1114                         if (sdev) {
1115                                 scsi_remove_device(sdev);
1116                                 scsi_device_put(sdev);
1117                         }
1118                         break;
1119                 case VSCSIFRONT_OP_READD_LUN:
1120                         if (device_state == XenbusStateConnected) {
1121                                 err = xenbus_printf(XBT_NIL, dev->nodename,
1122                                               info->dev_state_path,
1123                                               "%d", XenbusStateConnected);
1124                                 if (err)
1125                                         xenbus_dev_error(dev, err,
1126                                                 "%s: writing dev_state_path", __func__);
1127                         }
1128                         break;
1129                 default:
1130                         break;
1131                 }
1132         }
1133
1134         info->curr = NULL;
1135
1136         kfree(dir);
1137 }
1138
1139 static void scsifront_read_backend_params(struct xenbus_device *dev,
1140                                           struct vscsifrnt_info *info)
1141 {
1142         unsigned int sg_grant, nr_segs;
1143         struct Scsi_Host *host = info->host;
1144
1145         sg_grant = xenbus_read_unsigned(dev->otherend, "feature-sg-grant", 0);
1146         nr_segs = min_t(unsigned int, sg_grant, SG_ALL);
1147         nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE);
1148         nr_segs = min_t(unsigned int, nr_segs,
1149                         VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
1150                         sizeof(struct scsiif_request_segment));
1151
1152         if (!info->pause && sg_grant)
1153                 dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs);
1154         else if (info->pause && nr_segs < host->sg_tablesize)
1155                 dev_warn(&dev->dev,
1156                          "SG entries decreased from %d to %u - device may not work properly anymore\n",
1157                          host->sg_tablesize, nr_segs);
1158
1159         host->sg_tablesize = nr_segs;
1160         host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512;
1161 }
1162
1163 static void scsifront_backend_changed(struct xenbus_device *dev,
1164                                       enum xenbus_state backend_state)
1165 {
1166         struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
1167
1168         pr_debug("%s: %p %u %u\n", __func__, dev, dev->state, backend_state);
1169
1170         switch (backend_state) {
1171         case XenbusStateUnknown:
1172         case XenbusStateInitialising:
1173         case XenbusStateInitWait:
1174         case XenbusStateInitialised:
1175                 break;
1176
1177         case XenbusStateConnected:
1178                 scsifront_read_backend_params(dev, info);
1179
1180                 if (info->pause) {
1181                         scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
1182                         xenbus_switch_state(dev, XenbusStateConnected);
1183                         info->pause = 0;
1184                         return;
1185                 }
1186
1187                 if (xenbus_read_driver_state(dev->nodename) ==
1188                     XenbusStateInitialised)
1189                         scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
1190
1191                 if (dev->state != XenbusStateConnected)
1192                         xenbus_switch_state(dev, XenbusStateConnected);
1193                 break;
1194
1195         case XenbusStateClosed:
1196                 if (dev->state == XenbusStateClosed)
1197                         break;
1198                 fallthrough;    /* Missed the backend's Closing state */
1199         case XenbusStateClosing:
1200                 scsifront_disconnect(info);
1201                 break;
1202
1203         case XenbusStateReconfiguring:
1204                 scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
1205                 xenbus_switch_state(dev, XenbusStateReconfiguring);
1206                 break;
1207
1208         case XenbusStateReconfigured:
1209                 scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
1210                 xenbus_switch_state(dev, XenbusStateConnected);
1211                 break;
1212         }
1213 }
1214
1215 static const struct xenbus_device_id scsifront_ids[] = {
1216         { "vscsi" },
1217         { "" }
1218 };
1219
1220 static struct xenbus_driver scsifront_driver = {
1221         .ids                    = scsifront_ids,
1222         .probe                  = scsifront_probe,
1223         .remove                 = scsifront_remove,
1224         .resume                 = scsifront_resume,
1225         .suspend                = scsifront_suspend,
1226         .otherend_changed       = scsifront_backend_changed,
1227 };
1228
1229 static int __init scsifront_init(void)
1230 {
1231         if (!xen_domain())
1232                 return -ENODEV;
1233
1234         return xenbus_register_frontend(&scsifront_driver);
1235 }
1236 module_init(scsifront_init);
1237
1238 static void __exit scsifront_exit(void)
1239 {
1240         xenbus_unregister_driver(&scsifront_driver);
1241 }
1242 module_exit(scsifront_exit);
1243
1244 MODULE_DESCRIPTION("Xen SCSI frontend driver");
1245 MODULE_LICENSE("GPL");
1246 MODULE_ALIAS("xen:vscsi");
1247 MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");