GNU Linux-libre 5.19.9-gnu
[releases.git] / drivers / virtio / virtio_ring.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Virtio ring implementation.
3  *
4  *  Copyright 2007 Rusty Russell IBM Corporation
5  */
6 #include <linux/virtio.h>
7 #include <linux/virtio_ring.h>
8 #include <linux/virtio_config.h>
9 #include <linux/device.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/hrtimer.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/spinlock.h>
15 #include <xen/xen.h>
16
17 #ifdef DEBUG
18 /* For development, we want to crash whenever the ring is screwed. */
19 #define BAD_RING(_vq, fmt, args...)                             \
20         do {                                                    \
21                 dev_err(&(_vq)->vq.vdev->dev,                   \
22                         "%s:"fmt, (_vq)->vq.name, ##args);      \
23                 BUG();                                          \
24         } while (0)
25 /* Caller is supposed to guarantee no reentry. */
26 #define START_USE(_vq)                                          \
27         do {                                                    \
28                 if ((_vq)->in_use)                              \
29                         panic("%s:in_use = %i\n",               \
30                               (_vq)->vq.name, (_vq)->in_use);   \
31                 (_vq)->in_use = __LINE__;                       \
32         } while (0)
33 #define END_USE(_vq) \
34         do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
35 #define LAST_ADD_TIME_UPDATE(_vq)                               \
36         do {                                                    \
37                 ktime_t now = ktime_get();                      \
38                                                                 \
39                 /* No kick or get, with .1 second between?  Warn. */ \
40                 if ((_vq)->last_add_time_valid)                 \
41                         WARN_ON(ktime_to_ms(ktime_sub(now,      \
42                                 (_vq)->last_add_time)) > 100);  \
43                 (_vq)->last_add_time = now;                     \
44                 (_vq)->last_add_time_valid = true;              \
45         } while (0)
46 #define LAST_ADD_TIME_CHECK(_vq)                                \
47         do {                                                    \
48                 if ((_vq)->last_add_time_valid) {               \
49                         WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \
50                                       (_vq)->last_add_time)) > 100); \
51                 }                                               \
52         } while (0)
53 #define LAST_ADD_TIME_INVALID(_vq)                              \
54         ((_vq)->last_add_time_valid = false)
55 #else
56 #define BAD_RING(_vq, fmt, args...)                             \
57         do {                                                    \
58                 dev_err(&_vq->vq.vdev->dev,                     \
59                         "%s:"fmt, (_vq)->vq.name, ##args);      \
60                 (_vq)->broken = true;                           \
61         } while (0)
62 #define START_USE(vq)
63 #define END_USE(vq)
64 #define LAST_ADD_TIME_UPDATE(vq)
65 #define LAST_ADD_TIME_CHECK(vq)
66 #define LAST_ADD_TIME_INVALID(vq)
67 #endif
68
69 struct vring_desc_state_split {
70         void *data;                     /* Data for callback. */
71         struct vring_desc *indir_desc;  /* Indirect descriptor, if any. */
72 };
73
74 struct vring_desc_state_packed {
75         void *data;                     /* Data for callback. */
76         struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */
77         u16 num;                        /* Descriptor list length. */
78         u16 last;                       /* The last desc state in a list. */
79 };
80
81 struct vring_desc_extra {
82         dma_addr_t addr;                /* Descriptor DMA addr. */
83         u32 len;                        /* Descriptor length. */
84         u16 flags;                      /* Descriptor flags. */
85         u16 next;                       /* The next desc state in a list. */
86 };
87
88 struct vring_virtqueue {
89         struct virtqueue vq;
90
91         /* Is this a packed ring? */
92         bool packed_ring;
93
94         /* Is DMA API used? */
95         bool use_dma_api;
96
97         /* Can we use weak barriers? */
98         bool weak_barriers;
99
100         /* Other side has made a mess, don't try any more. */
101         bool broken;
102
103         /* Host supports indirect buffers */
104         bool indirect;
105
106         /* Host publishes avail event idx */
107         bool event;
108
109         /* Head of free buffer list. */
110         unsigned int free_head;
111         /* Number we've added since last sync. */
112         unsigned int num_added;
113
114         /* Last used index  we've seen.
115          * for split ring, it just contains last used index
116          * for packed ring:
117          * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
118          * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
119          */
120         u16 last_used_idx;
121
122         /* Hint for event idx: already triggered no need to disable. */
123         bool event_triggered;
124
125         union {
126                 /* Available for split ring */
127                 struct {
128                         /* Actual memory layout for this queue. */
129                         struct vring vring;
130
131                         /* Last written value to avail->flags */
132                         u16 avail_flags_shadow;
133
134                         /*
135                          * Last written value to avail->idx in
136                          * guest byte order.
137                          */
138                         u16 avail_idx_shadow;
139
140                         /* Per-descriptor state. */
141                         struct vring_desc_state_split *desc_state;
142                         struct vring_desc_extra *desc_extra;
143
144                         /* DMA address and size information */
145                         dma_addr_t queue_dma_addr;
146                         size_t queue_size_in_bytes;
147                 } split;
148
149                 /* Available for packed ring */
150                 struct {
151                         /* Actual memory layout for this queue. */
152                         struct {
153                                 unsigned int num;
154                                 struct vring_packed_desc *desc;
155                                 struct vring_packed_desc_event *driver;
156                                 struct vring_packed_desc_event *device;
157                         } vring;
158
159                         /* Driver ring wrap counter. */
160                         bool avail_wrap_counter;
161
162                         /* Avail used flags. */
163                         u16 avail_used_flags;
164
165                         /* Index of the next avail descriptor. */
166                         u16 next_avail_idx;
167
168                         /*
169                          * Last written value to driver->flags in
170                          * guest byte order.
171                          */
172                         u16 event_flags_shadow;
173
174                         /* Per-descriptor state. */
175                         struct vring_desc_state_packed *desc_state;
176                         struct vring_desc_extra *desc_extra;
177
178                         /* DMA address and size information */
179                         dma_addr_t ring_dma_addr;
180                         dma_addr_t driver_event_dma_addr;
181                         dma_addr_t device_event_dma_addr;
182                         size_t ring_size_in_bytes;
183                         size_t event_size_in_bytes;
184                 } packed;
185         };
186
187         /* How to notify other side. FIXME: commonalize hcalls! */
188         bool (*notify)(struct virtqueue *vq);
189
190         /* DMA, allocation, and size information */
191         bool we_own_ring;
192
193 #ifdef DEBUG
194         /* They're supposed to lock for us. */
195         unsigned int in_use;
196
197         /* Figure out if their kicks are too delayed. */
198         bool last_add_time_valid;
199         ktime_t last_add_time;
200 #endif
201 };
202
203
204 /*
205  * Helpers.
206  */
207
208 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
209
210 static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
211                                           unsigned int total_sg)
212 {
213         /*
214          * If the host supports indirect descriptor tables, and we have multiple
215          * buffers, then go indirect. FIXME: tune this threshold
216          */
217         return (vq->indirect && total_sg > 1 && vq->vq.num_free);
218 }
219
220 /*
221  * Modern virtio devices have feature bits to specify whether they need a
222  * quirk and bypass the IOMMU. If not there, just use the DMA API.
223  *
224  * If there, the interaction between virtio and DMA API is messy.
225  *
226  * On most systems with virtio, physical addresses match bus addresses,
227  * and it doesn't particularly matter whether we use the DMA API.
228  *
229  * On some systems, including Xen and any system with a physical device
230  * that speaks virtio behind a physical IOMMU, we must use the DMA API
231  * for virtio DMA to work at all.
232  *
233  * On other systems, including SPARC and PPC64, virtio-pci devices are
234  * enumerated as though they are behind an IOMMU, but the virtio host
235  * ignores the IOMMU, so we must either pretend that the IOMMU isn't
236  * there or somehow map everything as the identity.
237  *
238  * For the time being, we preserve historic behavior and bypass the DMA
239  * API.
240  *
241  * TODO: install a per-device DMA ops structure that does the right thing
242  * taking into account all the above quirks, and use the DMA API
243  * unconditionally on data path.
244  */
245
246 static bool vring_use_dma_api(struct virtio_device *vdev)
247 {
248         if (!virtio_has_dma_quirk(vdev))
249                 return true;
250
251         /* Otherwise, we are left to guess. */
252         /*
253          * In theory, it's possible to have a buggy QEMU-supposed
254          * emulated Q35 IOMMU and Xen enabled at the same time.  On
255          * such a configuration, virtio has never worked and will
256          * not work without an even larger kludge.  Instead, enable
257          * the DMA API if we're a Xen guest, which at least allows
258          * all of the sensible Xen configurations to work correctly.
259          */
260         if (xen_domain())
261                 return true;
262
263         return false;
264 }
265
266 size_t virtio_max_dma_size(struct virtio_device *vdev)
267 {
268         size_t max_segment_size = SIZE_MAX;
269
270         if (vring_use_dma_api(vdev))
271                 max_segment_size = dma_max_mapping_size(vdev->dev.parent);
272
273         return max_segment_size;
274 }
275 EXPORT_SYMBOL_GPL(virtio_max_dma_size);
276
277 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
278                               dma_addr_t *dma_handle, gfp_t flag)
279 {
280         if (vring_use_dma_api(vdev)) {
281                 return dma_alloc_coherent(vdev->dev.parent, size,
282                                           dma_handle, flag);
283         } else {
284                 void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag);
285
286                 if (queue) {
287                         phys_addr_t phys_addr = virt_to_phys(queue);
288                         *dma_handle = (dma_addr_t)phys_addr;
289
290                         /*
291                          * Sanity check: make sure we dind't truncate
292                          * the address.  The only arches I can find that
293                          * have 64-bit phys_addr_t but 32-bit dma_addr_t
294                          * are certain non-highmem MIPS and x86
295                          * configurations, but these configurations
296                          * should never allocate physical pages above 32
297                          * bits, so this is fine.  Just in case, throw a
298                          * warning and abort if we end up with an
299                          * unrepresentable address.
300                          */
301                         if (WARN_ON_ONCE(*dma_handle != phys_addr)) {
302                                 free_pages_exact(queue, PAGE_ALIGN(size));
303                                 return NULL;
304                         }
305                 }
306                 return queue;
307         }
308 }
309
310 static void vring_free_queue(struct virtio_device *vdev, size_t size,
311                              void *queue, dma_addr_t dma_handle)
312 {
313         if (vring_use_dma_api(vdev))
314                 dma_free_coherent(vdev->dev.parent, size, queue, dma_handle);
315         else
316                 free_pages_exact(queue, PAGE_ALIGN(size));
317 }
318
319 /*
320  * The DMA ops on various arches are rather gnarly right now, and
321  * making all of the arch DMA ops work on the vring device itself
322  * is a mess.  For now, we use the parent device for DMA ops.
323  */
324 static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
325 {
326         return vq->vq.vdev->dev.parent;
327 }
328
329 /* Map one sg entry. */
330 static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
331                                    struct scatterlist *sg,
332                                    enum dma_data_direction direction)
333 {
334         if (!vq->use_dma_api)
335                 return (dma_addr_t)sg_phys(sg);
336
337         /*
338          * We can't use dma_map_sg, because we don't use scatterlists in
339          * the way it expects (we don't guarantee that the scatterlist
340          * will exist for the lifetime of the mapping).
341          */
342         return dma_map_page(vring_dma_dev(vq),
343                             sg_page(sg), sg->offset, sg->length,
344                             direction);
345 }
346
347 static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
348                                    void *cpu_addr, size_t size,
349                                    enum dma_data_direction direction)
350 {
351         if (!vq->use_dma_api)
352                 return (dma_addr_t)virt_to_phys(cpu_addr);
353
354         return dma_map_single(vring_dma_dev(vq),
355                               cpu_addr, size, direction);
356 }
357
358 static int vring_mapping_error(const struct vring_virtqueue *vq,
359                                dma_addr_t addr)
360 {
361         if (!vq->use_dma_api)
362                 return 0;
363
364         return dma_mapping_error(vring_dma_dev(vq), addr);
365 }
366
367
368 /*
369  * Split ring specific functions - *_split().
370  */
371
372 static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
373                                            struct vring_desc *desc)
374 {
375         u16 flags;
376
377         if (!vq->use_dma_api)
378                 return;
379
380         flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
381
382         dma_unmap_page(vring_dma_dev(vq),
383                        virtio64_to_cpu(vq->vq.vdev, desc->addr),
384                        virtio32_to_cpu(vq->vq.vdev, desc->len),
385                        (flags & VRING_DESC_F_WRITE) ?
386                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
387 }
388
389 static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
390                                           unsigned int i)
391 {
392         struct vring_desc_extra *extra = vq->split.desc_extra;
393         u16 flags;
394
395         if (!vq->use_dma_api)
396                 goto out;
397
398         flags = extra[i].flags;
399
400         if (flags & VRING_DESC_F_INDIRECT) {
401                 dma_unmap_single(vring_dma_dev(vq),
402                                  extra[i].addr,
403                                  extra[i].len,
404                                  (flags & VRING_DESC_F_WRITE) ?
405                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
406         } else {
407                 dma_unmap_page(vring_dma_dev(vq),
408                                extra[i].addr,
409                                extra[i].len,
410                                (flags & VRING_DESC_F_WRITE) ?
411                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
412         }
413
414 out:
415         return extra[i].next;
416 }
417
418 static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq,
419                                                unsigned int total_sg,
420                                                gfp_t gfp)
421 {
422         struct vring_desc *desc;
423         unsigned int i;
424
425         /*
426          * We require lowmem mappings for the descriptors because
427          * otherwise virt_to_phys will give us bogus addresses in the
428          * virtqueue.
429          */
430         gfp &= ~__GFP_HIGHMEM;
431
432         desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp);
433         if (!desc)
434                 return NULL;
435
436         for (i = 0; i < total_sg; i++)
437                 desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
438         return desc;
439 }
440
441 static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
442                                                     struct vring_desc *desc,
443                                                     unsigned int i,
444                                                     dma_addr_t addr,
445                                                     unsigned int len,
446                                                     u16 flags,
447                                                     bool indirect)
448 {
449         struct vring_virtqueue *vring = to_vvq(vq);
450         struct vring_desc_extra *extra = vring->split.desc_extra;
451         u16 next;
452
453         desc[i].flags = cpu_to_virtio16(vq->vdev, flags);
454         desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
455         desc[i].len = cpu_to_virtio32(vq->vdev, len);
456
457         if (!indirect) {
458                 next = extra[i].next;
459                 desc[i].next = cpu_to_virtio16(vq->vdev, next);
460
461                 extra[i].addr = addr;
462                 extra[i].len = len;
463                 extra[i].flags = flags;
464         } else
465                 next = virtio16_to_cpu(vq->vdev, desc[i].next);
466
467         return next;
468 }
469
470 static inline int virtqueue_add_split(struct virtqueue *_vq,
471                                       struct scatterlist *sgs[],
472                                       unsigned int total_sg,
473                                       unsigned int out_sgs,
474                                       unsigned int in_sgs,
475                                       void *data,
476                                       void *ctx,
477                                       gfp_t gfp)
478 {
479         struct vring_virtqueue *vq = to_vvq(_vq);
480         struct scatterlist *sg;
481         struct vring_desc *desc;
482         unsigned int i, n, avail, descs_used, prev, err_idx;
483         int head;
484         bool indirect;
485
486         START_USE(vq);
487
488         BUG_ON(data == NULL);
489         BUG_ON(ctx && vq->indirect);
490
491         if (unlikely(vq->broken)) {
492                 END_USE(vq);
493                 return -EIO;
494         }
495
496         LAST_ADD_TIME_UPDATE(vq);
497
498         BUG_ON(total_sg == 0);
499
500         head = vq->free_head;
501
502         if (virtqueue_use_indirect(vq, total_sg))
503                 desc = alloc_indirect_split(_vq, total_sg, gfp);
504         else {
505                 desc = NULL;
506                 WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect);
507         }
508
509         if (desc) {
510                 /* Use a single buffer which doesn't continue */
511                 indirect = true;
512                 /* Set up rest to use this indirect table. */
513                 i = 0;
514                 descs_used = 1;
515         } else {
516                 indirect = false;
517                 desc = vq->split.vring.desc;
518                 i = head;
519                 descs_used = total_sg;
520         }
521
522         if (unlikely(vq->vq.num_free < descs_used)) {
523                 pr_debug("Can't add buf len %i - avail = %i\n",
524                          descs_used, vq->vq.num_free);
525                 /* FIXME: for historical reasons, we force a notify here if
526                  * there are outgoing parts to the buffer.  Presumably the
527                  * host should service the ring ASAP. */
528                 if (out_sgs)
529                         vq->notify(&vq->vq);
530                 if (indirect)
531                         kfree(desc);
532                 END_USE(vq);
533                 return -ENOSPC;
534         }
535
536         for (n = 0; n < out_sgs; n++) {
537                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
538                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
539                         if (vring_mapping_error(vq, addr))
540                                 goto unmap_release;
541
542                         prev = i;
543                         /* Note that we trust indirect descriptor
544                          * table since it use stream DMA mapping.
545                          */
546                         i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length,
547                                                      VRING_DESC_F_NEXT,
548                                                      indirect);
549                 }
550         }
551         for (; n < (out_sgs + in_sgs); n++) {
552                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
553                         dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
554                         if (vring_mapping_error(vq, addr))
555                                 goto unmap_release;
556
557                         prev = i;
558                         /* Note that we trust indirect descriptor
559                          * table since it use stream DMA mapping.
560                          */
561                         i = virtqueue_add_desc_split(_vq, desc, i, addr,
562                                                      sg->length,
563                                                      VRING_DESC_F_NEXT |
564                                                      VRING_DESC_F_WRITE,
565                                                      indirect);
566                 }
567         }
568         /* Last one doesn't continue. */
569         desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
570         if (!indirect && vq->use_dma_api)
571                 vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
572                         ~VRING_DESC_F_NEXT;
573
574         if (indirect) {
575                 /* Now that the indirect table is filled in, map it. */
576                 dma_addr_t addr = vring_map_single(
577                         vq, desc, total_sg * sizeof(struct vring_desc),
578                         DMA_TO_DEVICE);
579                 if (vring_mapping_error(vq, addr))
580                         goto unmap_release;
581
582                 virtqueue_add_desc_split(_vq, vq->split.vring.desc,
583                                          head, addr,
584                                          total_sg * sizeof(struct vring_desc),
585                                          VRING_DESC_F_INDIRECT,
586                                          false);
587         }
588
589         /* We're using some buffers from the free list. */
590         vq->vq.num_free -= descs_used;
591
592         /* Update free pointer */
593         if (indirect)
594                 vq->free_head = vq->split.desc_extra[head].next;
595         else
596                 vq->free_head = i;
597
598         /* Store token and indirect buffer state. */
599         vq->split.desc_state[head].data = data;
600         if (indirect)
601                 vq->split.desc_state[head].indir_desc = desc;
602         else
603                 vq->split.desc_state[head].indir_desc = ctx;
604
605         /* Put entry in available array (but don't update avail->idx until they
606          * do sync). */
607         avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
608         vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
609
610         /* Descriptors and available array need to be set before we expose the
611          * new available array entries. */
612         virtio_wmb(vq->weak_barriers);
613         vq->split.avail_idx_shadow++;
614         vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
615                                                 vq->split.avail_idx_shadow);
616         vq->num_added++;
617
618         pr_debug("Added buffer head %i to %p\n", head, vq);
619         END_USE(vq);
620
621         /* This is very unlikely, but theoretically possible.  Kick
622          * just in case. */
623         if (unlikely(vq->num_added == (1 << 16) - 1))
624                 virtqueue_kick(_vq);
625
626         return 0;
627
628 unmap_release:
629         err_idx = i;
630
631         if (indirect)
632                 i = 0;
633         else
634                 i = head;
635
636         for (n = 0; n < total_sg; n++) {
637                 if (i == err_idx)
638                         break;
639                 if (indirect) {
640                         vring_unmap_one_split_indirect(vq, &desc[i]);
641                         i = virtio16_to_cpu(_vq->vdev, desc[i].next);
642                 } else
643                         i = vring_unmap_one_split(vq, i);
644         }
645
646         if (indirect)
647                 kfree(desc);
648
649         END_USE(vq);
650         return -ENOMEM;
651 }
652
653 static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
654 {
655         struct vring_virtqueue *vq = to_vvq(_vq);
656         u16 new, old;
657         bool needs_kick;
658
659         START_USE(vq);
660         /* We need to expose available array entries before checking avail
661          * event. */
662         virtio_mb(vq->weak_barriers);
663
664         old = vq->split.avail_idx_shadow - vq->num_added;
665         new = vq->split.avail_idx_shadow;
666         vq->num_added = 0;
667
668         LAST_ADD_TIME_CHECK(vq);
669         LAST_ADD_TIME_INVALID(vq);
670
671         if (vq->event) {
672                 needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev,
673                                         vring_avail_event(&vq->split.vring)),
674                                               new, old);
675         } else {
676                 needs_kick = !(vq->split.vring.used->flags &
677                                         cpu_to_virtio16(_vq->vdev,
678                                                 VRING_USED_F_NO_NOTIFY));
679         }
680         END_USE(vq);
681         return needs_kick;
682 }
683
684 static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
685                              void **ctx)
686 {
687         unsigned int i, j;
688         __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
689
690         /* Clear data ptr. */
691         vq->split.desc_state[head].data = NULL;
692
693         /* Put back on free list: unmap first-level descriptors and find end */
694         i = head;
695
696         while (vq->split.vring.desc[i].flags & nextflag) {
697                 vring_unmap_one_split(vq, i);
698                 i = vq->split.desc_extra[i].next;
699                 vq->vq.num_free++;
700         }
701
702         vring_unmap_one_split(vq, i);
703         vq->split.desc_extra[i].next = vq->free_head;
704         vq->free_head = head;
705
706         /* Plus final descriptor */
707         vq->vq.num_free++;
708
709         if (vq->indirect) {
710                 struct vring_desc *indir_desc =
711                                 vq->split.desc_state[head].indir_desc;
712                 u32 len;
713
714                 /* Free the indirect table, if any, now that it's unmapped. */
715                 if (!indir_desc)
716                         return;
717
718                 len = vq->split.desc_extra[head].len;
719
720                 BUG_ON(!(vq->split.desc_extra[head].flags &
721                                 VRING_DESC_F_INDIRECT));
722                 BUG_ON(len == 0 || len % sizeof(struct vring_desc));
723
724                 for (j = 0; j < len / sizeof(struct vring_desc); j++)
725                         vring_unmap_one_split_indirect(vq, &indir_desc[j]);
726
727                 kfree(indir_desc);
728                 vq->split.desc_state[head].indir_desc = NULL;
729         } else if (ctx) {
730                 *ctx = vq->split.desc_state[head].indir_desc;
731         }
732 }
733
734 static inline bool more_used_split(const struct vring_virtqueue *vq)
735 {
736         return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev,
737                         vq->split.vring.used->idx);
738 }
739
740 static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
741                                          unsigned int *len,
742                                          void **ctx)
743 {
744         struct vring_virtqueue *vq = to_vvq(_vq);
745         void *ret;
746         unsigned int i;
747         u16 last_used;
748
749         START_USE(vq);
750
751         if (unlikely(vq->broken)) {
752                 END_USE(vq);
753                 return NULL;
754         }
755
756         if (!more_used_split(vq)) {
757                 pr_debug("No more buffers in queue\n");
758                 END_USE(vq);
759                 return NULL;
760         }
761
762         /* Only get used array entries after they have been exposed by host. */
763         virtio_rmb(vq->weak_barriers);
764
765         last_used = (vq->last_used_idx & (vq->split.vring.num - 1));
766         i = virtio32_to_cpu(_vq->vdev,
767                         vq->split.vring.used->ring[last_used].id);
768         *len = virtio32_to_cpu(_vq->vdev,
769                         vq->split.vring.used->ring[last_used].len);
770
771         if (unlikely(i >= vq->split.vring.num)) {
772                 BAD_RING(vq, "id %u out of range\n", i);
773                 return NULL;
774         }
775         if (unlikely(!vq->split.desc_state[i].data)) {
776                 BAD_RING(vq, "id %u is not a head!\n", i);
777                 return NULL;
778         }
779
780         /* detach_buf_split clears data, so grab it now. */
781         ret = vq->split.desc_state[i].data;
782         detach_buf_split(vq, i, ctx);
783         vq->last_used_idx++;
784         /* If we expect an interrupt for the next entry, tell host
785          * by writing event index and flush out the write before
786          * the read in the next get_buf call. */
787         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT))
788                 virtio_store_mb(vq->weak_barriers,
789                                 &vring_used_event(&vq->split.vring),
790                                 cpu_to_virtio16(_vq->vdev, vq->last_used_idx));
791
792         LAST_ADD_TIME_INVALID(vq);
793
794         END_USE(vq);
795         return ret;
796 }
797
798 static void virtqueue_disable_cb_split(struct virtqueue *_vq)
799 {
800         struct vring_virtqueue *vq = to_vvq(_vq);
801
802         if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
803                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
804                 if (vq->event)
805                         /* TODO: this is a hack. Figure out a cleaner value to write. */
806                         vring_used_event(&vq->split.vring) = 0x0;
807                 else
808                         vq->split.vring.avail->flags =
809                                 cpu_to_virtio16(_vq->vdev,
810                                                 vq->split.avail_flags_shadow);
811         }
812 }
813
814 static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq)
815 {
816         struct vring_virtqueue *vq = to_vvq(_vq);
817         u16 last_used_idx;
818
819         START_USE(vq);
820
821         /* We optimistically turn back on interrupts, then check if there was
822          * more to do. */
823         /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
824          * either clear the flags bit or point the event index at the next
825          * entry. Always do both to keep code simple. */
826         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
827                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
828                 if (!vq->event)
829                         vq->split.vring.avail->flags =
830                                 cpu_to_virtio16(_vq->vdev,
831                                                 vq->split.avail_flags_shadow);
832         }
833         vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev,
834                         last_used_idx = vq->last_used_idx);
835         END_USE(vq);
836         return last_used_idx;
837 }
838
839 static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx)
840 {
841         struct vring_virtqueue *vq = to_vvq(_vq);
842
843         return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev,
844                         vq->split.vring.used->idx);
845 }
846
847 static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq)
848 {
849         struct vring_virtqueue *vq = to_vvq(_vq);
850         u16 bufs;
851
852         START_USE(vq);
853
854         /* We optimistically turn back on interrupts, then check if there was
855          * more to do. */
856         /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
857          * either clear the flags bit or point the event index at the next
858          * entry. Always update the event index to keep code simple. */
859         if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
860                 vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
861                 if (!vq->event)
862                         vq->split.vring.avail->flags =
863                                 cpu_to_virtio16(_vq->vdev,
864                                                 vq->split.avail_flags_shadow);
865         }
866         /* TODO: tune this threshold */
867         bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4;
868
869         virtio_store_mb(vq->weak_barriers,
870                         &vring_used_event(&vq->split.vring),
871                         cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs));
872
873         if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx)
874                                         - vq->last_used_idx) > bufs)) {
875                 END_USE(vq);
876                 return false;
877         }
878
879         END_USE(vq);
880         return true;
881 }
882
883 static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq)
884 {
885         struct vring_virtqueue *vq = to_vvq(_vq);
886         unsigned int i;
887         void *buf;
888
889         START_USE(vq);
890
891         for (i = 0; i < vq->split.vring.num; i++) {
892                 if (!vq->split.desc_state[i].data)
893                         continue;
894                 /* detach_buf_split clears data, so grab it now. */
895                 buf = vq->split.desc_state[i].data;
896                 detach_buf_split(vq, i, NULL);
897                 vq->split.avail_idx_shadow--;
898                 vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
899                                 vq->split.avail_idx_shadow);
900                 END_USE(vq);
901                 return buf;
902         }
903         /* That should have freed everything. */
904         BUG_ON(vq->vq.num_free != vq->split.vring.num);
905
906         END_USE(vq);
907         return NULL;
908 }
909
910 static struct virtqueue *vring_create_virtqueue_split(
911         unsigned int index,
912         unsigned int num,
913         unsigned int vring_align,
914         struct virtio_device *vdev,
915         bool weak_barriers,
916         bool may_reduce_num,
917         bool context,
918         bool (*notify)(struct virtqueue *),
919         void (*callback)(struct virtqueue *),
920         const char *name)
921 {
922         struct virtqueue *vq;
923         void *queue = NULL;
924         dma_addr_t dma_addr;
925         size_t queue_size_in_bytes;
926         struct vring vring;
927
928         /* We assume num is a power of 2. */
929         if (num & (num - 1)) {
930                 dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
931                 return NULL;
932         }
933
934         /* TODO: allocate each queue chunk individually */
935         for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
936                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
937                                           &dma_addr,
938                                           GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
939                 if (queue)
940                         break;
941                 if (!may_reduce_num)
942                         return NULL;
943         }
944
945         if (!num)
946                 return NULL;
947
948         if (!queue) {
949                 /* Try to get a single page. You are my only hope! */
950                 queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
951                                           &dma_addr, GFP_KERNEL|__GFP_ZERO);
952         }
953         if (!queue)
954                 return NULL;
955
956         queue_size_in_bytes = vring_size(num, vring_align);
957         vring_init(&vring, num, queue, vring_align);
958
959         vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
960                                    notify, callback, name);
961         if (!vq) {
962                 vring_free_queue(vdev, queue_size_in_bytes, queue,
963                                  dma_addr);
964                 return NULL;
965         }
966
967         to_vvq(vq)->split.queue_dma_addr = dma_addr;
968         to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
969         to_vvq(vq)->we_own_ring = true;
970
971         return vq;
972 }
973
974
975 /*
976  * Packed ring specific functions - *_packed().
977  */
978 static inline bool packed_used_wrap_counter(u16 last_used_idx)
979 {
980         return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
981 }
982
983 static inline u16 packed_last_used(u16 last_used_idx)
984 {
985         return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
986 }
987
988 static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
989                                      struct vring_desc_extra *extra)
990 {
991         u16 flags;
992
993         if (!vq->use_dma_api)
994                 return;
995
996         flags = extra->flags;
997
998         if (flags & VRING_DESC_F_INDIRECT) {
999                 dma_unmap_single(vring_dma_dev(vq),
1000                                  extra->addr, extra->len,
1001                                  (flags & VRING_DESC_F_WRITE) ?
1002                                  DMA_FROM_DEVICE : DMA_TO_DEVICE);
1003         } else {
1004                 dma_unmap_page(vring_dma_dev(vq),
1005                                extra->addr, extra->len,
1006                                (flags & VRING_DESC_F_WRITE) ?
1007                                DMA_FROM_DEVICE : DMA_TO_DEVICE);
1008         }
1009 }
1010
1011 static void vring_unmap_desc_packed(const struct vring_virtqueue *vq,
1012                                    struct vring_packed_desc *desc)
1013 {
1014         u16 flags;
1015
1016         if (!vq->use_dma_api)
1017                 return;
1018
1019         flags = le16_to_cpu(desc->flags);
1020
1021         dma_unmap_page(vring_dma_dev(vq),
1022                        le64_to_cpu(desc->addr),
1023                        le32_to_cpu(desc->len),
1024                        (flags & VRING_DESC_F_WRITE) ?
1025                        DMA_FROM_DEVICE : DMA_TO_DEVICE);
1026 }
1027
1028 static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg,
1029                                                        gfp_t gfp)
1030 {
1031         struct vring_packed_desc *desc;
1032
1033         /*
1034          * We require lowmem mappings for the descriptors because
1035          * otherwise virt_to_phys will give us bogus addresses in the
1036          * virtqueue.
1037          */
1038         gfp &= ~__GFP_HIGHMEM;
1039
1040         desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp);
1041
1042         return desc;
1043 }
1044
1045 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
1046                                          struct scatterlist *sgs[],
1047                                          unsigned int total_sg,
1048                                          unsigned int out_sgs,
1049                                          unsigned int in_sgs,
1050                                          void *data,
1051                                          gfp_t gfp)
1052 {
1053         struct vring_packed_desc *desc;
1054         struct scatterlist *sg;
1055         unsigned int i, n, err_idx;
1056         u16 head, id;
1057         dma_addr_t addr;
1058
1059         head = vq->packed.next_avail_idx;
1060         desc = alloc_indirect_packed(total_sg, gfp);
1061         if (!desc)
1062                 return -ENOMEM;
1063
1064         if (unlikely(vq->vq.num_free < 1)) {
1065                 pr_debug("Can't add buf len 1 - avail = 0\n");
1066                 kfree(desc);
1067                 END_USE(vq);
1068                 return -ENOSPC;
1069         }
1070
1071         i = 0;
1072         id = vq->free_head;
1073         BUG_ON(id == vq->packed.vring.num);
1074
1075         for (n = 0; n < out_sgs + in_sgs; n++) {
1076                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1077                         addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1078                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1079                         if (vring_mapping_error(vq, addr))
1080                                 goto unmap_release;
1081
1082                         desc[i].flags = cpu_to_le16(n < out_sgs ?
1083                                                 0 : VRING_DESC_F_WRITE);
1084                         desc[i].addr = cpu_to_le64(addr);
1085                         desc[i].len = cpu_to_le32(sg->length);
1086                         i++;
1087                 }
1088         }
1089
1090         /* Now that the indirect table is filled in, map it. */
1091         addr = vring_map_single(vq, desc,
1092                         total_sg * sizeof(struct vring_packed_desc),
1093                         DMA_TO_DEVICE);
1094         if (vring_mapping_error(vq, addr))
1095                 goto unmap_release;
1096
1097         vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
1098         vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
1099                                 sizeof(struct vring_packed_desc));
1100         vq->packed.vring.desc[head].id = cpu_to_le16(id);
1101
1102         if (vq->use_dma_api) {
1103                 vq->packed.desc_extra[id].addr = addr;
1104                 vq->packed.desc_extra[id].len = total_sg *
1105                                 sizeof(struct vring_packed_desc);
1106                 vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT |
1107                                                   vq->packed.avail_used_flags;
1108         }
1109
1110         /*
1111          * A driver MUST NOT make the first descriptor in the list
1112          * available before all subsequent descriptors comprising
1113          * the list are made available.
1114          */
1115         virtio_wmb(vq->weak_barriers);
1116         vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT |
1117                                                 vq->packed.avail_used_flags);
1118
1119         /* We're using some buffers from the free list. */
1120         vq->vq.num_free -= 1;
1121
1122         /* Update free pointer */
1123         n = head + 1;
1124         if (n >= vq->packed.vring.num) {
1125                 n = 0;
1126                 vq->packed.avail_wrap_counter ^= 1;
1127                 vq->packed.avail_used_flags ^=
1128                                 1 << VRING_PACKED_DESC_F_AVAIL |
1129                                 1 << VRING_PACKED_DESC_F_USED;
1130         }
1131         vq->packed.next_avail_idx = n;
1132         vq->free_head = vq->packed.desc_extra[id].next;
1133
1134         /* Store token and indirect buffer state. */
1135         vq->packed.desc_state[id].num = 1;
1136         vq->packed.desc_state[id].data = data;
1137         vq->packed.desc_state[id].indir_desc = desc;
1138         vq->packed.desc_state[id].last = id;
1139
1140         vq->num_added += 1;
1141
1142         pr_debug("Added buffer head %i to %p\n", head, vq);
1143         END_USE(vq);
1144
1145         return 0;
1146
1147 unmap_release:
1148         err_idx = i;
1149
1150         for (i = 0; i < err_idx; i++)
1151                 vring_unmap_desc_packed(vq, &desc[i]);
1152
1153         kfree(desc);
1154
1155         END_USE(vq);
1156         return -ENOMEM;
1157 }
1158
1159 static inline int virtqueue_add_packed(struct virtqueue *_vq,
1160                                        struct scatterlist *sgs[],
1161                                        unsigned int total_sg,
1162                                        unsigned int out_sgs,
1163                                        unsigned int in_sgs,
1164                                        void *data,
1165                                        void *ctx,
1166                                        gfp_t gfp)
1167 {
1168         struct vring_virtqueue *vq = to_vvq(_vq);
1169         struct vring_packed_desc *desc;
1170         struct scatterlist *sg;
1171         unsigned int i, n, c, descs_used, err_idx;
1172         __le16 head_flags, flags;
1173         u16 head, id, prev, curr, avail_used_flags;
1174         int err;
1175
1176         START_USE(vq);
1177
1178         BUG_ON(data == NULL);
1179         BUG_ON(ctx && vq->indirect);
1180
1181         if (unlikely(vq->broken)) {
1182                 END_USE(vq);
1183                 return -EIO;
1184         }
1185
1186         LAST_ADD_TIME_UPDATE(vq);
1187
1188         BUG_ON(total_sg == 0);
1189
1190         if (virtqueue_use_indirect(vq, total_sg)) {
1191                 err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
1192                                                     in_sgs, data, gfp);
1193                 if (err != -ENOMEM) {
1194                         END_USE(vq);
1195                         return err;
1196                 }
1197
1198                 /* fall back on direct */
1199         }
1200
1201         head = vq->packed.next_avail_idx;
1202         avail_used_flags = vq->packed.avail_used_flags;
1203
1204         WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect);
1205
1206         desc = vq->packed.vring.desc;
1207         i = head;
1208         descs_used = total_sg;
1209
1210         if (unlikely(vq->vq.num_free < descs_used)) {
1211                 pr_debug("Can't add buf len %i - avail = %i\n",
1212                          descs_used, vq->vq.num_free);
1213                 END_USE(vq);
1214                 return -ENOSPC;
1215         }
1216
1217         id = vq->free_head;
1218         BUG_ON(id == vq->packed.vring.num);
1219
1220         curr = id;
1221         c = 0;
1222         for (n = 0; n < out_sgs + in_sgs; n++) {
1223                 for (sg = sgs[n]; sg; sg = sg_next(sg)) {
1224                         dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ?
1225                                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
1226                         if (vring_mapping_error(vq, addr))
1227                                 goto unmap_release;
1228
1229                         flags = cpu_to_le16(vq->packed.avail_used_flags |
1230                                     (++c == total_sg ? 0 : VRING_DESC_F_NEXT) |
1231                                     (n < out_sgs ? 0 : VRING_DESC_F_WRITE));
1232                         if (i == head)
1233                                 head_flags = flags;
1234                         else
1235                                 desc[i].flags = flags;
1236
1237                         desc[i].addr = cpu_to_le64(addr);
1238                         desc[i].len = cpu_to_le32(sg->length);
1239                         desc[i].id = cpu_to_le16(id);
1240
1241                         if (unlikely(vq->use_dma_api)) {
1242                                 vq->packed.desc_extra[curr].addr = addr;
1243                                 vq->packed.desc_extra[curr].len = sg->length;
1244                                 vq->packed.desc_extra[curr].flags =
1245                                         le16_to_cpu(flags);
1246                         }
1247                         prev = curr;
1248                         curr = vq->packed.desc_extra[curr].next;
1249
1250                         if ((unlikely(++i >= vq->packed.vring.num))) {
1251                                 i = 0;
1252                                 vq->packed.avail_used_flags ^=
1253                                         1 << VRING_PACKED_DESC_F_AVAIL |
1254                                         1 << VRING_PACKED_DESC_F_USED;
1255                         }
1256                 }
1257         }
1258
1259         if (i < head)
1260                 vq->packed.avail_wrap_counter ^= 1;
1261
1262         /* We're using some buffers from the free list. */
1263         vq->vq.num_free -= descs_used;
1264
1265         /* Update free pointer */
1266         vq->packed.next_avail_idx = i;
1267         vq->free_head = curr;
1268
1269         /* Store token. */
1270         vq->packed.desc_state[id].num = descs_used;
1271         vq->packed.desc_state[id].data = data;
1272         vq->packed.desc_state[id].indir_desc = ctx;
1273         vq->packed.desc_state[id].last = prev;
1274
1275         /*
1276          * A driver MUST NOT make the first descriptor in the list
1277          * available before all subsequent descriptors comprising
1278          * the list are made available.
1279          */
1280         virtio_wmb(vq->weak_barriers);
1281         vq->packed.vring.desc[head].flags = head_flags;
1282         vq->num_added += descs_used;
1283
1284         pr_debug("Added buffer head %i to %p\n", head, vq);
1285         END_USE(vq);
1286
1287         return 0;
1288
1289 unmap_release:
1290         err_idx = i;
1291         i = head;
1292         curr = vq->free_head;
1293
1294         vq->packed.avail_used_flags = avail_used_flags;
1295
1296         for (n = 0; n < total_sg; n++) {
1297                 if (i == err_idx)
1298                         break;
1299                 vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]);
1300                 curr = vq->packed.desc_extra[curr].next;
1301                 i++;
1302                 if (i >= vq->packed.vring.num)
1303                         i = 0;
1304         }
1305
1306         END_USE(vq);
1307         return -EIO;
1308 }
1309
1310 static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq)
1311 {
1312         struct vring_virtqueue *vq = to_vvq(_vq);
1313         u16 new, old, off_wrap, flags, wrap_counter, event_idx;
1314         bool needs_kick;
1315         union {
1316                 struct {
1317                         __le16 off_wrap;
1318                         __le16 flags;
1319                 };
1320                 u32 u32;
1321         } snapshot;
1322
1323         START_USE(vq);
1324
1325         /*
1326          * We need to expose the new flags value before checking notification
1327          * suppressions.
1328          */
1329         virtio_mb(vq->weak_barriers);
1330
1331         old = vq->packed.next_avail_idx - vq->num_added;
1332         new = vq->packed.next_avail_idx;
1333         vq->num_added = 0;
1334
1335         snapshot.u32 = *(u32 *)vq->packed.vring.device;
1336         flags = le16_to_cpu(snapshot.flags);
1337
1338         LAST_ADD_TIME_CHECK(vq);
1339         LAST_ADD_TIME_INVALID(vq);
1340
1341         if (flags != VRING_PACKED_EVENT_FLAG_DESC) {
1342                 needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE);
1343                 goto out;
1344         }
1345
1346         off_wrap = le16_to_cpu(snapshot.off_wrap);
1347
1348         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1349         event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1350         if (wrap_counter != vq->packed.avail_wrap_counter)
1351                 event_idx -= vq->packed.vring.num;
1352
1353         needs_kick = vring_need_event(event_idx, new, old);
1354 out:
1355         END_USE(vq);
1356         return needs_kick;
1357 }
1358
1359 static void detach_buf_packed(struct vring_virtqueue *vq,
1360                               unsigned int id, void **ctx)
1361 {
1362         struct vring_desc_state_packed *state = NULL;
1363         struct vring_packed_desc *desc;
1364         unsigned int i, curr;
1365
1366         state = &vq->packed.desc_state[id];
1367
1368         /* Clear data ptr. */
1369         state->data = NULL;
1370
1371         vq->packed.desc_extra[state->last].next = vq->free_head;
1372         vq->free_head = id;
1373         vq->vq.num_free += state->num;
1374
1375         if (unlikely(vq->use_dma_api)) {
1376                 curr = id;
1377                 for (i = 0; i < state->num; i++) {
1378                         vring_unmap_extra_packed(vq,
1379                                                  &vq->packed.desc_extra[curr]);
1380                         curr = vq->packed.desc_extra[curr].next;
1381                 }
1382         }
1383
1384         if (vq->indirect) {
1385                 u32 len;
1386
1387                 /* Free the indirect table, if any, now that it's unmapped. */
1388                 desc = state->indir_desc;
1389                 if (!desc)
1390                         return;
1391
1392                 if (vq->use_dma_api) {
1393                         len = vq->packed.desc_extra[id].len;
1394                         for (i = 0; i < len / sizeof(struct vring_packed_desc);
1395                                         i++)
1396                                 vring_unmap_desc_packed(vq, &desc[i]);
1397                 }
1398                 kfree(desc);
1399                 state->indir_desc = NULL;
1400         } else if (ctx) {
1401                 *ctx = state->indir_desc;
1402         }
1403 }
1404
1405 static inline bool is_used_desc_packed(const struct vring_virtqueue *vq,
1406                                        u16 idx, bool used_wrap_counter)
1407 {
1408         bool avail, used;
1409         u16 flags;
1410
1411         flags = le16_to_cpu(vq->packed.vring.desc[idx].flags);
1412         avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL));
1413         used = !!(flags & (1 << VRING_PACKED_DESC_F_USED));
1414
1415         return avail == used && used == used_wrap_counter;
1416 }
1417
1418 static inline bool more_used_packed(const struct vring_virtqueue *vq)
1419 {
1420         u16 last_used;
1421         u16 last_used_idx;
1422         bool used_wrap_counter;
1423
1424         last_used_idx = READ_ONCE(vq->last_used_idx);
1425         last_used = packed_last_used(last_used_idx);
1426         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1427         return is_used_desc_packed(vq, last_used, used_wrap_counter);
1428 }
1429
1430 static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
1431                                           unsigned int *len,
1432                                           void **ctx)
1433 {
1434         struct vring_virtqueue *vq = to_vvq(_vq);
1435         u16 last_used, id, last_used_idx;
1436         bool used_wrap_counter;
1437         void *ret;
1438
1439         START_USE(vq);
1440
1441         if (unlikely(vq->broken)) {
1442                 END_USE(vq);
1443                 return NULL;
1444         }
1445
1446         if (!more_used_packed(vq)) {
1447                 pr_debug("No more buffers in queue\n");
1448                 END_USE(vq);
1449                 return NULL;
1450         }
1451
1452         /* Only get used elements after they have been exposed by host. */
1453         virtio_rmb(vq->weak_barriers);
1454
1455         last_used_idx = READ_ONCE(vq->last_used_idx);
1456         used_wrap_counter = packed_used_wrap_counter(last_used_idx);
1457         last_used = packed_last_used(last_used_idx);
1458         id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
1459         *len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
1460
1461         if (unlikely(id >= vq->packed.vring.num)) {
1462                 BAD_RING(vq, "id %u out of range\n", id);
1463                 return NULL;
1464         }
1465         if (unlikely(!vq->packed.desc_state[id].data)) {
1466                 BAD_RING(vq, "id %u is not a head!\n", id);
1467                 return NULL;
1468         }
1469
1470         /* detach_buf_packed clears data, so grab it now. */
1471         ret = vq->packed.desc_state[id].data;
1472         detach_buf_packed(vq, id, ctx);
1473
1474         last_used += vq->packed.desc_state[id].num;
1475         if (unlikely(last_used >= vq->packed.vring.num)) {
1476                 last_used -= vq->packed.vring.num;
1477                 used_wrap_counter ^= 1;
1478         }
1479
1480         last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1481         WRITE_ONCE(vq->last_used_idx, last_used);
1482
1483         /*
1484          * If we expect an interrupt for the next entry, tell host
1485          * by writing event index and flush out the write before
1486          * the read in the next get_buf call.
1487          */
1488         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
1489                 virtio_store_mb(vq->weak_barriers,
1490                                 &vq->packed.vring.driver->off_wrap,
1491                                 cpu_to_le16(vq->last_used_idx));
1492
1493         LAST_ADD_TIME_INVALID(vq);
1494
1495         END_USE(vq);
1496         return ret;
1497 }
1498
1499 static void virtqueue_disable_cb_packed(struct virtqueue *_vq)
1500 {
1501         struct vring_virtqueue *vq = to_vvq(_vq);
1502
1503         if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) {
1504                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1505                 vq->packed.vring.driver->flags =
1506                         cpu_to_le16(vq->packed.event_flags_shadow);
1507         }
1508 }
1509
1510 static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq)
1511 {
1512         struct vring_virtqueue *vq = to_vvq(_vq);
1513
1514         START_USE(vq);
1515
1516         /*
1517          * We optimistically turn back on interrupts, then check if there was
1518          * more to do.
1519          */
1520
1521         if (vq->event) {
1522                 vq->packed.vring.driver->off_wrap =
1523                         cpu_to_le16(vq->last_used_idx);
1524                 /*
1525                  * We need to update event offset and event wrap
1526                  * counter first before updating event flags.
1527                  */
1528                 virtio_wmb(vq->weak_barriers);
1529         }
1530
1531         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1532                 vq->packed.event_flags_shadow = vq->event ?
1533                                 VRING_PACKED_EVENT_FLAG_DESC :
1534                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1535                 vq->packed.vring.driver->flags =
1536                                 cpu_to_le16(vq->packed.event_flags_shadow);
1537         }
1538
1539         END_USE(vq);
1540         return vq->last_used_idx;
1541 }
1542
1543 static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
1544 {
1545         struct vring_virtqueue *vq = to_vvq(_vq);
1546         bool wrap_counter;
1547         u16 used_idx;
1548
1549         wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR;
1550         used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1551
1552         return is_used_desc_packed(vq, used_idx, wrap_counter);
1553 }
1554
1555 static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
1556 {
1557         struct vring_virtqueue *vq = to_vvq(_vq);
1558         u16 used_idx, wrap_counter, last_used_idx;
1559         u16 bufs;
1560
1561         START_USE(vq);
1562
1563         /*
1564          * We optimistically turn back on interrupts, then check if there was
1565          * more to do.
1566          */
1567
1568         if (vq->event) {
1569                 /* TODO: tune this threshold */
1570                 bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
1571                 last_used_idx = READ_ONCE(vq->last_used_idx);
1572                 wrap_counter = packed_used_wrap_counter(last_used_idx);
1573
1574                 used_idx = packed_last_used(last_used_idx) + bufs;
1575                 if (used_idx >= vq->packed.vring.num) {
1576                         used_idx -= vq->packed.vring.num;
1577                         wrap_counter ^= 1;
1578                 }
1579
1580                 vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx |
1581                         (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
1582
1583                 /*
1584                  * We need to update event offset and event wrap
1585                  * counter first before updating event flags.
1586                  */
1587                 virtio_wmb(vq->weak_barriers);
1588         }
1589
1590         if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
1591                 vq->packed.event_flags_shadow = vq->event ?
1592                                 VRING_PACKED_EVENT_FLAG_DESC :
1593                                 VRING_PACKED_EVENT_FLAG_ENABLE;
1594                 vq->packed.vring.driver->flags =
1595                                 cpu_to_le16(vq->packed.event_flags_shadow);
1596         }
1597
1598         /*
1599          * We need to update event suppression structure first
1600          * before re-checking for more used buffers.
1601          */
1602         virtio_mb(vq->weak_barriers);
1603
1604         last_used_idx = READ_ONCE(vq->last_used_idx);
1605         wrap_counter = packed_used_wrap_counter(last_used_idx);
1606         used_idx = packed_last_used(last_used_idx);
1607         if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
1608                 END_USE(vq);
1609                 return false;
1610         }
1611
1612         END_USE(vq);
1613         return true;
1614 }
1615
1616 static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq)
1617 {
1618         struct vring_virtqueue *vq = to_vvq(_vq);
1619         unsigned int i;
1620         void *buf;
1621
1622         START_USE(vq);
1623
1624         for (i = 0; i < vq->packed.vring.num; i++) {
1625                 if (!vq->packed.desc_state[i].data)
1626                         continue;
1627                 /* detach_buf clears data, so grab it now. */
1628                 buf = vq->packed.desc_state[i].data;
1629                 detach_buf_packed(vq, i, NULL);
1630                 END_USE(vq);
1631                 return buf;
1632         }
1633         /* That should have freed everything. */
1634         BUG_ON(vq->vq.num_free != vq->packed.vring.num);
1635
1636         END_USE(vq);
1637         return NULL;
1638 }
1639
1640 static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq,
1641                                                        unsigned int num)
1642 {
1643         struct vring_desc_extra *desc_extra;
1644         unsigned int i;
1645
1646         desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra),
1647                                    GFP_KERNEL);
1648         if (!desc_extra)
1649                 return NULL;
1650
1651         memset(desc_extra, 0, num * sizeof(struct vring_desc_extra));
1652
1653         for (i = 0; i < num - 1; i++)
1654                 desc_extra[i].next = i + 1;
1655
1656         return desc_extra;
1657 }
1658
1659 static struct virtqueue *vring_create_virtqueue_packed(
1660         unsigned int index,
1661         unsigned int num,
1662         unsigned int vring_align,
1663         struct virtio_device *vdev,
1664         bool weak_barriers,
1665         bool may_reduce_num,
1666         bool context,
1667         bool (*notify)(struct virtqueue *),
1668         void (*callback)(struct virtqueue *),
1669         const char *name)
1670 {
1671         struct vring_virtqueue *vq;
1672         struct vring_packed_desc *ring;
1673         struct vring_packed_desc_event *driver, *device;
1674         dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
1675         size_t ring_size_in_bytes, event_size_in_bytes;
1676
1677         ring_size_in_bytes = num * sizeof(struct vring_packed_desc);
1678
1679         ring = vring_alloc_queue(vdev, ring_size_in_bytes,
1680                                  &ring_dma_addr,
1681                                  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1682         if (!ring)
1683                 goto err_ring;
1684
1685         event_size_in_bytes = sizeof(struct vring_packed_desc_event);
1686
1687         driver = vring_alloc_queue(vdev, event_size_in_bytes,
1688                                    &driver_event_dma_addr,
1689                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1690         if (!driver)
1691                 goto err_driver;
1692
1693         device = vring_alloc_queue(vdev, event_size_in_bytes,
1694                                    &device_event_dma_addr,
1695                                    GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
1696         if (!device)
1697                 goto err_device;
1698
1699         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
1700         if (!vq)
1701                 goto err_vq;
1702
1703         vq->vq.callback = callback;
1704         vq->vq.vdev = vdev;
1705         vq->vq.name = name;
1706         vq->vq.num_free = num;
1707         vq->vq.index = index;
1708         vq->we_own_ring = true;
1709         vq->notify = notify;
1710         vq->weak_barriers = weak_barriers;
1711 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
1712         vq->broken = true;
1713 #else
1714         vq->broken = false;
1715 #endif
1716         vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
1717         vq->event_triggered = false;
1718         vq->num_added = 0;
1719         vq->packed_ring = true;
1720         vq->use_dma_api = vring_use_dma_api(vdev);
1721 #ifdef DEBUG
1722         vq->in_use = false;
1723         vq->last_add_time_valid = false;
1724 #endif
1725
1726         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
1727                 !context;
1728         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
1729
1730         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
1731                 vq->weak_barriers = false;
1732
1733         vq->packed.ring_dma_addr = ring_dma_addr;
1734         vq->packed.driver_event_dma_addr = driver_event_dma_addr;
1735         vq->packed.device_event_dma_addr = device_event_dma_addr;
1736
1737         vq->packed.ring_size_in_bytes = ring_size_in_bytes;
1738         vq->packed.event_size_in_bytes = event_size_in_bytes;
1739
1740         vq->packed.vring.num = num;
1741         vq->packed.vring.desc = ring;
1742         vq->packed.vring.driver = driver;
1743         vq->packed.vring.device = device;
1744
1745         vq->packed.next_avail_idx = 0;
1746         vq->packed.avail_wrap_counter = 1;
1747         vq->packed.event_flags_shadow = 0;
1748         vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
1749
1750         vq->packed.desc_state = kmalloc_array(num,
1751                         sizeof(struct vring_desc_state_packed),
1752                         GFP_KERNEL);
1753         if (!vq->packed.desc_state)
1754                 goto err_desc_state;
1755
1756         memset(vq->packed.desc_state, 0,
1757                 num * sizeof(struct vring_desc_state_packed));
1758
1759         /* Put everything in free lists. */
1760         vq->free_head = 0;
1761
1762         vq->packed.desc_extra = vring_alloc_desc_extra(vq, num);
1763         if (!vq->packed.desc_extra)
1764                 goto err_desc_extra;
1765
1766         /* No callback?  Tell other side not to bother us. */
1767         if (!callback) {
1768                 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
1769                 vq->packed.vring.driver->flags =
1770                         cpu_to_le16(vq->packed.event_flags_shadow);
1771         }
1772
1773         spin_lock(&vdev->vqs_list_lock);
1774         list_add_tail(&vq->vq.list, &vdev->vqs);
1775         spin_unlock(&vdev->vqs_list_lock);
1776         return &vq->vq;
1777
1778 err_desc_extra:
1779         kfree(vq->packed.desc_state);
1780 err_desc_state:
1781         kfree(vq);
1782 err_vq:
1783         vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr);
1784 err_device:
1785         vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr);
1786 err_driver:
1787         vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
1788 err_ring:
1789         return NULL;
1790 }
1791
1792
1793 /*
1794  * Generic functions and exported symbols.
1795  */
1796
1797 static inline int virtqueue_add(struct virtqueue *_vq,
1798                                 struct scatterlist *sgs[],
1799                                 unsigned int total_sg,
1800                                 unsigned int out_sgs,
1801                                 unsigned int in_sgs,
1802                                 void *data,
1803                                 void *ctx,
1804                                 gfp_t gfp)
1805 {
1806         struct vring_virtqueue *vq = to_vvq(_vq);
1807
1808         return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
1809                                         out_sgs, in_sgs, data, ctx, gfp) :
1810                                  virtqueue_add_split(_vq, sgs, total_sg,
1811                                         out_sgs, in_sgs, data, ctx, gfp);
1812 }
1813
1814 /**
1815  * virtqueue_add_sgs - expose buffers to other end
1816  * @_vq: the struct virtqueue we're talking about.
1817  * @sgs: array of terminated scatterlists.
1818  * @out_sgs: the number of scatterlists readable by other side
1819  * @in_sgs: the number of scatterlists which are writable (after readable ones)
1820  * @data: the token identifying the buffer.
1821  * @gfp: how to do memory allocations (if necessary).
1822  *
1823  * Caller must ensure we don't call this with other virtqueue operations
1824  * at the same time (except where noted).
1825  *
1826  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1827  */
1828 int virtqueue_add_sgs(struct virtqueue *_vq,
1829                       struct scatterlist *sgs[],
1830                       unsigned int out_sgs,
1831                       unsigned int in_sgs,
1832                       void *data,
1833                       gfp_t gfp)
1834 {
1835         unsigned int i, total_sg = 0;
1836
1837         /* Count them first. */
1838         for (i = 0; i < out_sgs + in_sgs; i++) {
1839                 struct scatterlist *sg;
1840
1841                 for (sg = sgs[i]; sg; sg = sg_next(sg))
1842                         total_sg++;
1843         }
1844         return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs,
1845                              data, NULL, gfp);
1846 }
1847 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
1848
1849 /**
1850  * virtqueue_add_outbuf - expose output buffers to other end
1851  * @vq: the struct virtqueue we're talking about.
1852  * @sg: scatterlist (must be well-formed and terminated!)
1853  * @num: the number of entries in @sg readable by other side
1854  * @data: the token identifying the buffer.
1855  * @gfp: how to do memory allocations (if necessary).
1856  *
1857  * Caller must ensure we don't call this with other virtqueue operations
1858  * at the same time (except where noted).
1859  *
1860  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1861  */
1862 int virtqueue_add_outbuf(struct virtqueue *vq,
1863                          struct scatterlist *sg, unsigned int num,
1864                          void *data,
1865                          gfp_t gfp)
1866 {
1867         return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp);
1868 }
1869 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
1870
1871 /**
1872  * virtqueue_add_inbuf - expose input buffers to other end
1873  * @vq: the struct virtqueue we're talking about.
1874  * @sg: scatterlist (must be well-formed and terminated!)
1875  * @num: the number of entries in @sg writable by other side
1876  * @data: the token identifying the buffer.
1877  * @gfp: how to do memory allocations (if necessary).
1878  *
1879  * Caller must ensure we don't call this with other virtqueue operations
1880  * at the same time (except where noted).
1881  *
1882  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1883  */
1884 int virtqueue_add_inbuf(struct virtqueue *vq,
1885                         struct scatterlist *sg, unsigned int num,
1886                         void *data,
1887                         gfp_t gfp)
1888 {
1889         return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp);
1890 }
1891 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
1892
1893 /**
1894  * virtqueue_add_inbuf_ctx - expose input buffers to other end
1895  * @vq: the struct virtqueue we're talking about.
1896  * @sg: scatterlist (must be well-formed and terminated!)
1897  * @num: the number of entries in @sg writable by other side
1898  * @data: the token identifying the buffer.
1899  * @ctx: extra context for the token
1900  * @gfp: how to do memory allocations (if necessary).
1901  *
1902  * Caller must ensure we don't call this with other virtqueue operations
1903  * at the same time (except where noted).
1904  *
1905  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
1906  */
1907 int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
1908                         struct scatterlist *sg, unsigned int num,
1909                         void *data,
1910                         void *ctx,
1911                         gfp_t gfp)
1912 {
1913         return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp);
1914 }
1915 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx);
1916
1917 /**
1918  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
1919  * @_vq: the struct virtqueue
1920  *
1921  * Instead of virtqueue_kick(), you can do:
1922  *      if (virtqueue_kick_prepare(vq))
1923  *              virtqueue_notify(vq);
1924  *
1925  * This is sometimes useful because the virtqueue_kick_prepare() needs
1926  * to be serialized, but the actual virtqueue_notify() call does not.
1927  */
1928 bool virtqueue_kick_prepare(struct virtqueue *_vq)
1929 {
1930         struct vring_virtqueue *vq = to_vvq(_vq);
1931
1932         return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) :
1933                                  virtqueue_kick_prepare_split(_vq);
1934 }
1935 EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
1936
1937 /**
1938  * virtqueue_notify - second half of split virtqueue_kick call.
1939  * @_vq: the struct virtqueue
1940  *
1941  * This does not need to be serialized.
1942  *
1943  * Returns false if host notify failed or queue is broken, otherwise true.
1944  */
1945 bool virtqueue_notify(struct virtqueue *_vq)
1946 {
1947         struct vring_virtqueue *vq = to_vvq(_vq);
1948
1949         if (unlikely(vq->broken))
1950                 return false;
1951
1952         /* Prod other side to tell it about changes. */
1953         if (!vq->notify(_vq)) {
1954                 vq->broken = true;
1955                 return false;
1956         }
1957         return true;
1958 }
1959 EXPORT_SYMBOL_GPL(virtqueue_notify);
1960
1961 /**
1962  * virtqueue_kick - update after add_buf
1963  * @vq: the struct virtqueue
1964  *
1965  * After one or more virtqueue_add_* calls, invoke this to kick
1966  * the other side.
1967  *
1968  * Caller must ensure we don't call this with other virtqueue
1969  * operations at the same time (except where noted).
1970  *
1971  * Returns false if kick failed, otherwise true.
1972  */
1973 bool virtqueue_kick(struct virtqueue *vq)
1974 {
1975         if (virtqueue_kick_prepare(vq))
1976                 return virtqueue_notify(vq);
1977         return true;
1978 }
1979 EXPORT_SYMBOL_GPL(virtqueue_kick);
1980
1981 /**
1982  * virtqueue_get_buf_ctx - get the next used buffer
1983  * @_vq: the struct virtqueue we're talking about.
1984  * @len: the length written into the buffer
1985  * @ctx: extra context for the token
1986  *
1987  * If the device wrote data into the buffer, @len will be set to the
1988  * amount written.  This means you don't need to clear the buffer
1989  * beforehand to ensure there's no data leakage in the case of short
1990  * writes.
1991  *
1992  * Caller must ensure we don't call this with other virtqueue
1993  * operations at the same time (except where noted).
1994  *
1995  * Returns NULL if there are no used buffers, or the "data" token
1996  * handed to virtqueue_add_*().
1997  */
1998 void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
1999                             void **ctx)
2000 {
2001         struct vring_virtqueue *vq = to_vvq(_vq);
2002
2003         return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) :
2004                                  virtqueue_get_buf_ctx_split(_vq, len, ctx);
2005 }
2006 EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
2007
2008 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
2009 {
2010         return virtqueue_get_buf_ctx(_vq, len, NULL);
2011 }
2012 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
2013 /**
2014  * virtqueue_disable_cb - disable callbacks
2015  * @_vq: the struct virtqueue we're talking about.
2016  *
2017  * Note that this is not necessarily synchronous, hence unreliable and only
2018  * useful as an optimization.
2019  *
2020  * Unlike other operations, this need not be serialized.
2021  */
2022 void virtqueue_disable_cb(struct virtqueue *_vq)
2023 {
2024         struct vring_virtqueue *vq = to_vvq(_vq);
2025
2026         /* If device triggered an event already it won't trigger one again:
2027          * no need to disable.
2028          */
2029         if (vq->event_triggered)
2030                 return;
2031
2032         if (vq->packed_ring)
2033                 virtqueue_disable_cb_packed(_vq);
2034         else
2035                 virtqueue_disable_cb_split(_vq);
2036 }
2037 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
2038
2039 /**
2040  * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
2041  * @_vq: the struct virtqueue we're talking about.
2042  *
2043  * This re-enables callbacks; it returns current queue state
2044  * in an opaque unsigned value. This value should be later tested by
2045  * virtqueue_poll, to detect a possible race between the driver checking for
2046  * more work, and enabling callbacks.
2047  *
2048  * Caller must ensure we don't call this with other virtqueue
2049  * operations at the same time (except where noted).
2050  */
2051 unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
2052 {
2053         struct vring_virtqueue *vq = to_vvq(_vq);
2054
2055         if (vq->event_triggered)
2056                 vq->event_triggered = false;
2057
2058         return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) :
2059                                  virtqueue_enable_cb_prepare_split(_vq);
2060 }
2061 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
2062
2063 /**
2064  * virtqueue_poll - query pending used buffers
2065  * @_vq: the struct virtqueue we're talking about.
2066  * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
2067  *
2068  * Returns "true" if there are pending used buffers in the queue.
2069  *
2070  * This does not need to be serialized.
2071  */
2072 bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
2073 {
2074         struct vring_virtqueue *vq = to_vvq(_vq);
2075
2076         if (unlikely(vq->broken))
2077                 return false;
2078
2079         virtio_mb(vq->weak_barriers);
2080         return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) :
2081                                  virtqueue_poll_split(_vq, last_used_idx);
2082 }
2083 EXPORT_SYMBOL_GPL(virtqueue_poll);
2084
2085 /**
2086  * virtqueue_enable_cb - restart callbacks after disable_cb.
2087  * @_vq: the struct virtqueue we're talking about.
2088  *
2089  * This re-enables callbacks; it returns "false" if there are pending
2090  * buffers in the queue, to detect a possible race between the driver
2091  * checking for more work, and enabling callbacks.
2092  *
2093  * Caller must ensure we don't call this with other virtqueue
2094  * operations at the same time (except where noted).
2095  */
2096 bool virtqueue_enable_cb(struct virtqueue *_vq)
2097 {
2098         unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq);
2099
2100         return !virtqueue_poll(_vq, last_used_idx);
2101 }
2102 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
2103
2104 /**
2105  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
2106  * @_vq: the struct virtqueue we're talking about.
2107  *
2108  * This re-enables callbacks but hints to the other side to delay
2109  * interrupts until most of the available buffers have been processed;
2110  * it returns "false" if there are many pending buffers in the queue,
2111  * to detect a possible race between the driver checking for more work,
2112  * and enabling callbacks.
2113  *
2114  * Caller must ensure we don't call this with other virtqueue
2115  * operations at the same time (except where noted).
2116  */
2117 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
2118 {
2119         struct vring_virtqueue *vq = to_vvq(_vq);
2120
2121         if (vq->event_triggered)
2122                 vq->event_triggered = false;
2123
2124         return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) :
2125                                  virtqueue_enable_cb_delayed_split(_vq);
2126 }
2127 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
2128
2129 /**
2130  * virtqueue_detach_unused_buf - detach first unused buffer
2131  * @_vq: the struct virtqueue we're talking about.
2132  *
2133  * Returns NULL or the "data" token handed to virtqueue_add_*().
2134  * This is not valid on an active queue; it is useful only for device
2135  * shutdown.
2136  */
2137 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
2138 {
2139         struct vring_virtqueue *vq = to_vvq(_vq);
2140
2141         return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) :
2142                                  virtqueue_detach_unused_buf_split(_vq);
2143 }
2144 EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
2145
2146 static inline bool more_used(const struct vring_virtqueue *vq)
2147 {
2148         return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
2149 }
2150
2151 irqreturn_t vring_interrupt(int irq, void *_vq)
2152 {
2153         struct vring_virtqueue *vq = to_vvq(_vq);
2154
2155         if (!more_used(vq)) {
2156                 pr_debug("virtqueue interrupt with no work for %p\n", vq);
2157                 return IRQ_NONE;
2158         }
2159
2160         if (unlikely(vq->broken)) {
2161 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2162                 dev_warn_once(&vq->vq.vdev->dev,
2163                               "virtio vring IRQ raised before DRIVER_OK");
2164                 return IRQ_NONE;
2165 #else
2166                 return IRQ_HANDLED;
2167 #endif
2168         }
2169
2170         /* Just a hint for performance: so it's ok that this can be racy! */
2171         if (vq->event)
2172                 vq->event_triggered = true;
2173
2174         pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
2175         if (vq->vq.callback)
2176                 vq->vq.callback(&vq->vq);
2177
2178         return IRQ_HANDLED;
2179 }
2180 EXPORT_SYMBOL_GPL(vring_interrupt);
2181
2182 /* Only available for split ring */
2183 struct virtqueue *__vring_new_virtqueue(unsigned int index,
2184                                         struct vring vring,
2185                                         struct virtio_device *vdev,
2186                                         bool weak_barriers,
2187                                         bool context,
2188                                         bool (*notify)(struct virtqueue *),
2189                                         void (*callback)(struct virtqueue *),
2190                                         const char *name)
2191 {
2192         struct vring_virtqueue *vq;
2193
2194         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2195                 return NULL;
2196
2197         vq = kmalloc(sizeof(*vq), GFP_KERNEL);
2198         if (!vq)
2199                 return NULL;
2200
2201         vq->packed_ring = false;
2202         vq->vq.callback = callback;
2203         vq->vq.vdev = vdev;
2204         vq->vq.name = name;
2205         vq->vq.num_free = vring.num;
2206         vq->vq.index = index;
2207         vq->we_own_ring = false;
2208         vq->notify = notify;
2209         vq->weak_barriers = weak_barriers;
2210 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
2211         vq->broken = true;
2212 #else
2213         vq->broken = false;
2214 #endif
2215         vq->last_used_idx = 0;
2216         vq->event_triggered = false;
2217         vq->num_added = 0;
2218         vq->use_dma_api = vring_use_dma_api(vdev);
2219 #ifdef DEBUG
2220         vq->in_use = false;
2221         vq->last_add_time_valid = false;
2222 #endif
2223
2224         vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) &&
2225                 !context;
2226         vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
2227
2228         if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
2229                 vq->weak_barriers = false;
2230
2231         vq->split.queue_dma_addr = 0;
2232         vq->split.queue_size_in_bytes = 0;
2233
2234         vq->split.vring = vring;
2235         vq->split.avail_flags_shadow = 0;
2236         vq->split.avail_idx_shadow = 0;
2237
2238         /* No callback?  Tell other side not to bother us. */
2239         if (!callback) {
2240                 vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
2241                 if (!vq->event)
2242                         vq->split.vring.avail->flags = cpu_to_virtio16(vdev,
2243                                         vq->split.avail_flags_shadow);
2244         }
2245
2246         vq->split.desc_state = kmalloc_array(vring.num,
2247                         sizeof(struct vring_desc_state_split), GFP_KERNEL);
2248         if (!vq->split.desc_state)
2249                 goto err_state;
2250
2251         vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num);
2252         if (!vq->split.desc_extra)
2253                 goto err_extra;
2254
2255         /* Put everything in free lists. */
2256         vq->free_head = 0;
2257         memset(vq->split.desc_state, 0, vring.num *
2258                         sizeof(struct vring_desc_state_split));
2259
2260         spin_lock(&vdev->vqs_list_lock);
2261         list_add_tail(&vq->vq.list, &vdev->vqs);
2262         spin_unlock(&vdev->vqs_list_lock);
2263         return &vq->vq;
2264
2265 err_extra:
2266         kfree(vq->split.desc_state);
2267 err_state:
2268         kfree(vq);
2269         return NULL;
2270 }
2271 EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
2272
2273 struct virtqueue *vring_create_virtqueue(
2274         unsigned int index,
2275         unsigned int num,
2276         unsigned int vring_align,
2277         struct virtio_device *vdev,
2278         bool weak_barriers,
2279         bool may_reduce_num,
2280         bool context,
2281         bool (*notify)(struct virtqueue *),
2282         void (*callback)(struct virtqueue *),
2283         const char *name)
2284 {
2285
2286         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2287                 return vring_create_virtqueue_packed(index, num, vring_align,
2288                                 vdev, weak_barriers, may_reduce_num,
2289                                 context, notify, callback, name);
2290
2291         return vring_create_virtqueue_split(index, num, vring_align,
2292                         vdev, weak_barriers, may_reduce_num,
2293                         context, notify, callback, name);
2294 }
2295 EXPORT_SYMBOL_GPL(vring_create_virtqueue);
2296
2297 /* Only available for split ring */
2298 struct virtqueue *vring_new_virtqueue(unsigned int index,
2299                                       unsigned int num,
2300                                       unsigned int vring_align,
2301                                       struct virtio_device *vdev,
2302                                       bool weak_barriers,
2303                                       bool context,
2304                                       void *pages,
2305                                       bool (*notify)(struct virtqueue *vq),
2306                                       void (*callback)(struct virtqueue *vq),
2307                                       const char *name)
2308 {
2309         struct vring vring;
2310
2311         if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
2312                 return NULL;
2313
2314         vring_init(&vring, num, pages, vring_align);
2315         return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context,
2316                                      notify, callback, name);
2317 }
2318 EXPORT_SYMBOL_GPL(vring_new_virtqueue);
2319
2320 void vring_del_virtqueue(struct virtqueue *_vq)
2321 {
2322         struct vring_virtqueue *vq = to_vvq(_vq);
2323
2324         spin_lock(&vq->vq.vdev->vqs_list_lock);
2325         list_del(&_vq->list);
2326         spin_unlock(&vq->vq.vdev->vqs_list_lock);
2327
2328         if (vq->we_own_ring) {
2329                 if (vq->packed_ring) {
2330                         vring_free_queue(vq->vq.vdev,
2331                                          vq->packed.ring_size_in_bytes,
2332                                          vq->packed.vring.desc,
2333                                          vq->packed.ring_dma_addr);
2334
2335                         vring_free_queue(vq->vq.vdev,
2336                                          vq->packed.event_size_in_bytes,
2337                                          vq->packed.vring.driver,
2338                                          vq->packed.driver_event_dma_addr);
2339
2340                         vring_free_queue(vq->vq.vdev,
2341                                          vq->packed.event_size_in_bytes,
2342                                          vq->packed.vring.device,
2343                                          vq->packed.device_event_dma_addr);
2344
2345                         kfree(vq->packed.desc_state);
2346                         kfree(vq->packed.desc_extra);
2347                 } else {
2348                         vring_free_queue(vq->vq.vdev,
2349                                          vq->split.queue_size_in_bytes,
2350                                          vq->split.vring.desc,
2351                                          vq->split.queue_dma_addr);
2352                 }
2353         }
2354         if (!vq->packed_ring) {
2355                 kfree(vq->split.desc_state);
2356                 kfree(vq->split.desc_extra);
2357         }
2358         kfree(vq);
2359 }
2360 EXPORT_SYMBOL_GPL(vring_del_virtqueue);
2361
2362 /* Manipulates transport-specific feature bits. */
2363 void vring_transport_features(struct virtio_device *vdev)
2364 {
2365         unsigned int i;
2366
2367         for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
2368                 switch (i) {
2369                 case VIRTIO_RING_F_INDIRECT_DESC:
2370                         break;
2371                 case VIRTIO_RING_F_EVENT_IDX:
2372                         break;
2373                 case VIRTIO_F_VERSION_1:
2374                         break;
2375                 case VIRTIO_F_ACCESS_PLATFORM:
2376                         break;
2377                 case VIRTIO_F_RING_PACKED:
2378                         break;
2379                 case VIRTIO_F_ORDER_PLATFORM:
2380                         break;
2381                 default:
2382                         /* We don't understand this bit. */
2383                         __virtio_clear_bit(vdev, i);
2384                 }
2385         }
2386 }
2387 EXPORT_SYMBOL_GPL(vring_transport_features);
2388
2389 /**
2390  * virtqueue_get_vring_size - return the size of the virtqueue's vring
2391  * @_vq: the struct virtqueue containing the vring of interest.
2392  *
2393  * Returns the size of the vring.  This is mainly used for boasting to
2394  * userspace.  Unlike other operations, this need not be serialized.
2395  */
2396 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
2397 {
2398
2399         struct vring_virtqueue *vq = to_vvq(_vq);
2400
2401         return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num;
2402 }
2403 EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
2404
2405 bool virtqueue_is_broken(struct virtqueue *_vq)
2406 {
2407         struct vring_virtqueue *vq = to_vvq(_vq);
2408
2409         return READ_ONCE(vq->broken);
2410 }
2411 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
2412
2413 /*
2414  * This should prevent the device from being used, allowing drivers to
2415  * recover.  You may need to grab appropriate locks to flush.
2416  */
2417 void virtio_break_device(struct virtio_device *dev)
2418 {
2419         struct virtqueue *_vq;
2420
2421         spin_lock(&dev->vqs_list_lock);
2422         list_for_each_entry(_vq, &dev->vqs, list) {
2423                 struct vring_virtqueue *vq = to_vvq(_vq);
2424
2425                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2426                 WRITE_ONCE(vq->broken, true);
2427         }
2428         spin_unlock(&dev->vqs_list_lock);
2429 }
2430 EXPORT_SYMBOL_GPL(virtio_break_device);
2431
2432 /*
2433  * This should allow the device to be used by the driver. You may
2434  * need to grab appropriate locks to flush the write to
2435  * vq->broken. This should only be used in some specific case e.g
2436  * (probing and restoring). This function should only be called by the
2437  * core, not directly by the driver.
2438  */
2439 void __virtio_unbreak_device(struct virtio_device *dev)
2440 {
2441         struct virtqueue *_vq;
2442
2443         spin_lock(&dev->vqs_list_lock);
2444         list_for_each_entry(_vq, &dev->vqs, list) {
2445                 struct vring_virtqueue *vq = to_vvq(_vq);
2446
2447                 /* Pairs with READ_ONCE() in virtqueue_is_broken(). */
2448                 WRITE_ONCE(vq->broken, false);
2449         }
2450         spin_unlock(&dev->vqs_list_lock);
2451 }
2452 EXPORT_SYMBOL_GPL(__virtio_unbreak_device);
2453
2454 dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq)
2455 {
2456         struct vring_virtqueue *vq = to_vvq(_vq);
2457
2458         BUG_ON(!vq->we_own_ring);
2459
2460         if (vq->packed_ring)
2461                 return vq->packed.ring_dma_addr;
2462
2463         return vq->split.queue_dma_addr;
2464 }
2465 EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr);
2466
2467 dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq)
2468 {
2469         struct vring_virtqueue *vq = to_vvq(_vq);
2470
2471         BUG_ON(!vq->we_own_ring);
2472
2473         if (vq->packed_ring)
2474                 return vq->packed.driver_event_dma_addr;
2475
2476         return vq->split.queue_dma_addr +
2477                 ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc);
2478 }
2479 EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr);
2480
2481 dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq)
2482 {
2483         struct vring_virtqueue *vq = to_vvq(_vq);
2484
2485         BUG_ON(!vq->we_own_ring);
2486
2487         if (vq->packed_ring)
2488                 return vq->packed.device_event_dma_addr;
2489
2490         return vq->split.queue_dma_addr +
2491                 ((char *)vq->split.vring.used - (char *)vq->split.vring.desc);
2492 }
2493 EXPORT_SYMBOL_GPL(virtqueue_get_used_addr);
2494
2495 /* Only available for split ring */
2496 const struct vring *virtqueue_get_vring(struct virtqueue *vq)
2497 {
2498         return &to_vvq(vq)->split.vring;
2499 }
2500 EXPORT_SYMBOL_GPL(virtqueue_get_vring);
2501
2502 MODULE_LICENSE("GPL");