GNU Linux-libre 5.4.257-gnu1
[releases.git] / fs / fuse / virtio_fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * virtio-fs: Virtio Filesystem
4  * Copyright (C) 2018 Red Hat, Inc.
5  */
6
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17  * mutual exclusion in device removal and mounting path
18  */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21
22 enum {
23         VQ_HIPRIO,
24         VQ_REQUEST
25 };
26
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29         spinlock_t lock;
30         struct virtqueue *vq;     /* protected by ->lock */
31         struct work_struct done_work;
32         struct list_head queued_reqs;
33         struct list_head end_reqs;      /* End these requests */
34         struct delayed_work dispatch_work;
35         struct fuse_dev *fud;
36         bool connected;
37         long in_flight;
38         char name[24];
39 } ____cacheline_aligned_in_smp;
40
41 /* A virtio-fs device instance */
42 struct virtio_fs {
43         struct kref refcount;
44         struct list_head list;    /* on virtio_fs_instances */
45         char *tag;
46         struct virtio_fs_vq *vqs;
47         unsigned int nvqs;               /* number of virtqueues */
48         unsigned int num_request_queues; /* number of request queues */
49 };
50
51 struct virtio_fs_forget {
52         struct fuse_in_header ih;
53         struct fuse_forget_in arg;
54         /* This request can be temporarily queued on virt queue */
55         struct list_head list;
56 };
57
58 struct virtio_fs_req_work {
59         struct fuse_req *req;
60         struct virtio_fs_vq *fsvq;
61         struct work_struct done_work;
62 };
63
64 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
65                                  struct fuse_req *req, bool in_flight);
66
67 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
68 {
69         struct virtio_fs *fs = vq->vdev->priv;
70
71         return &fs->vqs[vq->index];
72 }
73
74 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
75 {
76         return &vq_to_fsvq(vq)->fud->pq;
77 }
78
79 /* Should be called with fsvq->lock held. */
80 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
81 {
82         fsvq->in_flight++;
83 }
84
85 /* Should be called with fsvq->lock held. */
86 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
87 {
88         WARN_ON(fsvq->in_flight <= 0);
89         fsvq->in_flight--;
90 }
91
92 static void release_virtio_fs_obj(struct kref *ref)
93 {
94         struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
95
96         kfree(vfs->vqs);
97         kfree(vfs);
98 }
99
100 /* Make sure virtiofs_mutex is held */
101 static void virtio_fs_put(struct virtio_fs *fs)
102 {
103         kref_put(&fs->refcount, release_virtio_fs_obj);
104 }
105
106 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
107 {
108         struct virtio_fs *vfs = fiq->priv;
109
110         mutex_lock(&virtio_fs_mutex);
111         virtio_fs_put(vfs);
112         mutex_unlock(&virtio_fs_mutex);
113 }
114
115 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
116 {
117         WARN_ON(fsvq->in_flight < 0);
118
119         /* Wait for in flight requests to finish.*/
120         while (1) {
121                 spin_lock(&fsvq->lock);
122                 if (!fsvq->in_flight) {
123                         spin_unlock(&fsvq->lock);
124                         break;
125                 }
126                 spin_unlock(&fsvq->lock);
127                 /* TODO use completion instead of timeout */
128                 usleep_range(1000, 2000);
129         }
130
131         flush_work(&fsvq->done_work);
132         flush_delayed_work(&fsvq->dispatch_work);
133 }
134
135 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
136 {
137         struct virtio_fs_vq *fsvq;
138         int i;
139
140         for (i = 0; i < fs->nvqs; i++) {
141                 fsvq = &fs->vqs[i];
142                 virtio_fs_drain_queue(fsvq);
143         }
144 }
145
146 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
147 {
148         struct virtio_fs_vq *fsvq;
149         int i;
150
151         for (i = 0; i < fs->nvqs; i++) {
152                 fsvq = &fs->vqs[i];
153                 spin_lock(&fsvq->lock);
154                 fsvq->connected = true;
155                 spin_unlock(&fsvq->lock);
156         }
157 }
158
159 /* Add a new instance to the list or return -EEXIST if tag name exists*/
160 static int virtio_fs_add_instance(struct virtio_fs *fs)
161 {
162         struct virtio_fs *fs2;
163         bool duplicate = false;
164
165         mutex_lock(&virtio_fs_mutex);
166
167         list_for_each_entry(fs2, &virtio_fs_instances, list) {
168                 if (strcmp(fs->tag, fs2->tag) == 0)
169                         duplicate = true;
170         }
171
172         if (!duplicate)
173                 list_add_tail(&fs->list, &virtio_fs_instances);
174
175         mutex_unlock(&virtio_fs_mutex);
176
177         if (duplicate)
178                 return -EEXIST;
179         return 0;
180 }
181
182 /* Return the virtio_fs with a given tag, or NULL */
183 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
184 {
185         struct virtio_fs *fs;
186
187         mutex_lock(&virtio_fs_mutex);
188
189         list_for_each_entry(fs, &virtio_fs_instances, list) {
190                 if (strcmp(fs->tag, tag) == 0) {
191                         kref_get(&fs->refcount);
192                         goto found;
193                 }
194         }
195
196         fs = NULL; /* not found */
197
198 found:
199         mutex_unlock(&virtio_fs_mutex);
200
201         return fs;
202 }
203
204 static void virtio_fs_free_devs(struct virtio_fs *fs)
205 {
206         unsigned int i;
207
208         for (i = 0; i < fs->nvqs; i++) {
209                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
210
211                 if (!fsvq->fud)
212                         continue;
213
214                 fuse_dev_free(fsvq->fud);
215                 fsvq->fud = NULL;
216         }
217 }
218
219 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
220 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
221 {
222         char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
223         char *end;
224         size_t len;
225
226         virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
227                            &tag_buf, sizeof(tag_buf));
228         end = memchr(tag_buf, '\0', sizeof(tag_buf));
229         if (end == tag_buf)
230                 return -EINVAL; /* empty tag */
231         if (!end)
232                 end = &tag_buf[sizeof(tag_buf)];
233
234         len = end - tag_buf;
235         fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
236         if (!fs->tag)
237                 return -ENOMEM;
238         memcpy(fs->tag, tag_buf, len);
239         fs->tag[len] = '\0';
240         return 0;
241 }
242
243 /* Work function for hiprio completion */
244 static void virtio_fs_hiprio_done_work(struct work_struct *work)
245 {
246         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
247                                                  done_work);
248         struct virtqueue *vq = fsvq->vq;
249
250         /* Free completed FUSE_FORGET requests */
251         spin_lock(&fsvq->lock);
252         do {
253                 unsigned int len;
254                 void *req;
255
256                 virtqueue_disable_cb(vq);
257
258                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
259                         kfree(req);
260                         dec_in_flight_req(fsvq);
261                 }
262         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
263         spin_unlock(&fsvq->lock);
264 }
265
266 static void virtio_fs_request_dispatch_work(struct work_struct *work)
267 {
268         struct fuse_req *req;
269         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
270                                                  dispatch_work.work);
271         struct fuse_conn *fc = fsvq->fud->fc;
272         int ret;
273
274         pr_debug("virtio-fs: worker %s called.\n", __func__);
275         while (1) {
276                 spin_lock(&fsvq->lock);
277                 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
278                                                list);
279                 if (!req) {
280                         spin_unlock(&fsvq->lock);
281                         break;
282                 }
283
284                 list_del_init(&req->list);
285                 spin_unlock(&fsvq->lock);
286                 fuse_request_end(fc, req);
287         }
288
289         /* Dispatch pending requests */
290         while (1) {
291                 spin_lock(&fsvq->lock);
292                 req = list_first_entry_or_null(&fsvq->queued_reqs,
293                                                struct fuse_req, list);
294                 if (!req) {
295                         spin_unlock(&fsvq->lock);
296                         return;
297                 }
298                 list_del_init(&req->list);
299                 spin_unlock(&fsvq->lock);
300
301                 ret = virtio_fs_enqueue_req(fsvq, req, true);
302                 if (ret < 0) {
303                         if (ret == -ENOMEM || ret == -ENOSPC) {
304                                 spin_lock(&fsvq->lock);
305                                 list_add_tail(&req->list, &fsvq->queued_reqs);
306                                 schedule_delayed_work(&fsvq->dispatch_work,
307                                                       msecs_to_jiffies(1));
308                                 spin_unlock(&fsvq->lock);
309                                 return;
310                         }
311                         req->out.h.error = ret;
312                         spin_lock(&fsvq->lock);
313                         dec_in_flight_req(fsvq);
314                         spin_unlock(&fsvq->lock);
315                         pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
316                                ret);
317                         fuse_request_end(fc, req);
318                 }
319         }
320 }
321
322 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
323 {
324         struct virtio_fs_forget *forget;
325         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
326                                                  dispatch_work.work);
327         struct virtqueue *vq = fsvq->vq;
328         struct scatterlist sg;
329         struct scatterlist *sgs[] = {&sg};
330         bool notify;
331         int ret;
332
333         pr_debug("virtio-fs: worker %s called.\n", __func__);
334         while (1) {
335                 spin_lock(&fsvq->lock);
336                 forget = list_first_entry_or_null(&fsvq->queued_reqs,
337                                         struct virtio_fs_forget, list);
338                 if (!forget) {
339                         spin_unlock(&fsvq->lock);
340                         return;
341                 }
342
343                 list_del(&forget->list);
344                 if (!fsvq->connected) {
345                         dec_in_flight_req(fsvq);
346                         spin_unlock(&fsvq->lock);
347                         kfree(forget);
348                         continue;
349                 }
350
351                 sg_init_one(&sg, forget, sizeof(*forget));
352
353                 /* Enqueue the request */
354                 dev_dbg(&vq->vdev->dev, "%s\n", __func__);
355                 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
356                 if (ret < 0) {
357                         if (ret == -ENOMEM || ret == -ENOSPC) {
358                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
359                                          ret);
360                                 list_add_tail(&forget->list,
361                                                 &fsvq->queued_reqs);
362                                 schedule_delayed_work(&fsvq->dispatch_work,
363                                                 msecs_to_jiffies(1));
364                         } else {
365                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
366                                          ret);
367                                 dec_in_flight_req(fsvq);
368                                 kfree(forget);
369                         }
370                         spin_unlock(&fsvq->lock);
371                         return;
372                 }
373
374                 notify = virtqueue_kick_prepare(vq);
375                 spin_unlock(&fsvq->lock);
376
377                 if (notify)
378                         virtqueue_notify(vq);
379                 pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
380                          __func__);
381         }
382 }
383
384 /* Allocate and copy args into req->argbuf */
385 static int copy_args_to_argbuf(struct fuse_req *req)
386 {
387         struct fuse_args *args = req->args;
388         unsigned int offset = 0;
389         unsigned int num_in;
390         unsigned int num_out;
391         unsigned int len;
392         unsigned int i;
393
394         num_in = args->in_numargs - args->in_pages;
395         num_out = args->out_numargs - args->out_pages;
396         len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
397               fuse_len_args(num_out, args->out_args);
398
399         req->argbuf = kmalloc(len, GFP_ATOMIC);
400         if (!req->argbuf)
401                 return -ENOMEM;
402
403         for (i = 0; i < num_in; i++) {
404                 memcpy(req->argbuf + offset,
405                        args->in_args[i].value,
406                        args->in_args[i].size);
407                 offset += args->in_args[i].size;
408         }
409
410         return 0;
411 }
412
413 /* Copy args out of and free req->argbuf */
414 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
415 {
416         unsigned int remaining;
417         unsigned int offset;
418         unsigned int num_in;
419         unsigned int num_out;
420         unsigned int i;
421
422         remaining = req->out.h.len - sizeof(req->out.h);
423         num_in = args->in_numargs - args->in_pages;
424         num_out = args->out_numargs - args->out_pages;
425         offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
426
427         for (i = 0; i < num_out; i++) {
428                 unsigned int argsize = args->out_args[i].size;
429
430                 if (args->out_argvar &&
431                     i == args->out_numargs - 1 &&
432                     argsize > remaining) {
433                         argsize = remaining;
434                 }
435
436                 memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
437                 offset += argsize;
438
439                 if (i != args->out_numargs - 1)
440                         remaining -= argsize;
441         }
442
443         /* Store the actual size of the variable-length arg */
444         if (args->out_argvar)
445                 args->out_args[args->out_numargs - 1].size = remaining;
446
447         kfree(req->argbuf);
448         req->argbuf = NULL;
449 }
450
451 /* Work function for request completion */
452 static void virtio_fs_request_complete(struct fuse_req *req,
453                                        struct virtio_fs_vq *fsvq)
454 {
455         struct fuse_pqueue *fpq = &fsvq->fud->pq;
456         struct fuse_conn *fc = fsvq->fud->fc;
457         struct fuse_args *args;
458         struct fuse_args_pages *ap;
459         unsigned int len, i, thislen;
460         struct page *page;
461
462         /*
463          * TODO verify that server properly follows FUSE protocol
464          * (oh.uniq, oh.len)
465          */
466         args = req->args;
467         copy_args_from_argbuf(args, req);
468
469         if (args->out_pages && args->page_zeroing) {
470                 len = args->out_args[args->out_numargs - 1].size;
471                 ap = container_of(args, typeof(*ap), args);
472                 for (i = 0; i < ap->num_pages; i++) {
473                         thislen = ap->descs[i].length;
474                         if (len < thislen) {
475                                 WARN_ON(ap->descs[i].offset);
476                                 page = ap->pages[i];
477                                 zero_user_segment(page, len, thislen);
478                                 len = 0;
479                         } else {
480                                 len -= thislen;
481                         }
482                 }
483         }
484
485         spin_lock(&fpq->lock);
486         clear_bit(FR_SENT, &req->flags);
487         spin_unlock(&fpq->lock);
488
489         fuse_request_end(fc, req);
490         spin_lock(&fsvq->lock);
491         dec_in_flight_req(fsvq);
492         spin_unlock(&fsvq->lock);
493 }
494
495 static void virtio_fs_complete_req_work(struct work_struct *work)
496 {
497         struct virtio_fs_req_work *w =
498                 container_of(work, typeof(*w), done_work);
499
500         virtio_fs_request_complete(w->req, w->fsvq);
501         kfree(w);
502 }
503
504 static void virtio_fs_requests_done_work(struct work_struct *work)
505 {
506         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
507                                                  done_work);
508         struct fuse_pqueue *fpq = &fsvq->fud->pq;
509         struct virtqueue *vq = fsvq->vq;
510         struct fuse_req *req;
511         struct fuse_req *next;
512         unsigned int len;
513         LIST_HEAD(reqs);
514
515         /* Collect completed requests off the virtqueue */
516         spin_lock(&fsvq->lock);
517         do {
518                 virtqueue_disable_cb(vq);
519
520                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
521                         spin_lock(&fpq->lock);
522                         list_move_tail(&req->list, &reqs);
523                         spin_unlock(&fpq->lock);
524                 }
525         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
526         spin_unlock(&fsvq->lock);
527
528         /* End requests */
529         list_for_each_entry_safe(req, next, &reqs, list) {
530                 list_del_init(&req->list);
531
532                 /* blocking async request completes in a worker context */
533                 if (req->args->may_block) {
534                         struct virtio_fs_req_work *w;
535
536                         w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
537                         INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
538                         w->fsvq = fsvq;
539                         w->req = req;
540                         schedule_work(&w->done_work);
541                 } else {
542                         virtio_fs_request_complete(req, fsvq);
543                 }
544         }
545 }
546
547 /* Virtqueue interrupt handler */
548 static void virtio_fs_vq_done(struct virtqueue *vq)
549 {
550         struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
551
552         dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
553
554         schedule_work(&fsvq->done_work);
555 }
556
557 /* Initialize virtqueues */
558 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
559                                struct virtio_fs *fs)
560 {
561         struct virtqueue **vqs;
562         vq_callback_t **callbacks;
563         const char **names;
564         unsigned int i;
565         int ret = 0;
566
567         virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
568                      &fs->num_request_queues);
569         if (fs->num_request_queues == 0)
570                 return -EINVAL;
571
572         fs->nvqs = 1 + fs->num_request_queues;
573         fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
574         if (!fs->vqs)
575                 return -ENOMEM;
576
577         vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
578         callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
579                                         GFP_KERNEL);
580         names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
581         if (!vqs || !callbacks || !names) {
582                 ret = -ENOMEM;
583                 goto out;
584         }
585
586         callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
587         snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
588                         "hiprio");
589         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
590         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
591         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
592         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
593         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
594                         virtio_fs_hiprio_dispatch_work);
595         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
596
597         /* Initialize the requests virtqueues */
598         for (i = VQ_REQUEST; i < fs->nvqs; i++) {
599                 spin_lock_init(&fs->vqs[i].lock);
600                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
601                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
602                                   virtio_fs_request_dispatch_work);
603                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
604                 INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
605                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
606                          "requests.%u", i - VQ_REQUEST);
607                 callbacks[i] = virtio_fs_vq_done;
608                 names[i] = fs->vqs[i].name;
609         }
610
611         ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
612         if (ret < 0)
613                 goto out;
614
615         for (i = 0; i < fs->nvqs; i++)
616                 fs->vqs[i].vq = vqs[i];
617
618         virtio_fs_start_all_queues(fs);
619 out:
620         kfree(names);
621         kfree(callbacks);
622         kfree(vqs);
623         if (ret)
624                 kfree(fs->vqs);
625         return ret;
626 }
627
628 /* Free virtqueues (device must already be reset) */
629 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
630                                   struct virtio_fs *fs)
631 {
632         vdev->config->del_vqs(vdev);
633 }
634
635 static int virtio_fs_probe(struct virtio_device *vdev)
636 {
637         struct virtio_fs *fs;
638         int ret;
639
640         fs = kzalloc(sizeof(*fs), GFP_KERNEL);
641         if (!fs)
642                 return -ENOMEM;
643         kref_init(&fs->refcount);
644         vdev->priv = fs;
645
646         ret = virtio_fs_read_tag(vdev, fs);
647         if (ret < 0)
648                 goto out;
649
650         ret = virtio_fs_setup_vqs(vdev, fs);
651         if (ret < 0)
652                 goto out;
653
654         /* TODO vq affinity */
655
656         /* Bring the device online in case the filesystem is mounted and
657          * requests need to be sent before we return.
658          */
659         virtio_device_ready(vdev);
660
661         ret = virtio_fs_add_instance(fs);
662         if (ret < 0)
663                 goto out_vqs;
664
665         return 0;
666
667 out_vqs:
668         vdev->config->reset(vdev);
669         virtio_fs_cleanup_vqs(vdev, fs);
670         kfree(fs->vqs);
671
672 out:
673         vdev->priv = NULL;
674         kfree(fs);
675         return ret;
676 }
677
678 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
679 {
680         struct virtio_fs_vq *fsvq;
681         int i;
682
683         for (i = 0; i < fs->nvqs; i++) {
684                 fsvq = &fs->vqs[i];
685                 spin_lock(&fsvq->lock);
686                 fsvq->connected = false;
687                 spin_unlock(&fsvq->lock);
688         }
689 }
690
691 static void virtio_fs_remove(struct virtio_device *vdev)
692 {
693         struct virtio_fs *fs = vdev->priv;
694
695         mutex_lock(&virtio_fs_mutex);
696         /* This device is going away. No one should get new reference */
697         list_del_init(&fs->list);
698         virtio_fs_stop_all_queues(fs);
699         virtio_fs_drain_all_queues(fs);
700         vdev->config->reset(vdev);
701         virtio_fs_cleanup_vqs(vdev, fs);
702
703         vdev->priv = NULL;
704         /* Put device reference on virtio_fs object */
705         virtio_fs_put(fs);
706         mutex_unlock(&virtio_fs_mutex);
707 }
708
709 #ifdef CONFIG_PM_SLEEP
710 static int virtio_fs_freeze(struct virtio_device *vdev)
711 {
712         /* TODO need to save state here */
713         pr_warn("virtio-fs: suspend/resume not yet supported\n");
714         return -EOPNOTSUPP;
715 }
716
717 static int virtio_fs_restore(struct virtio_device *vdev)
718 {
719          /* TODO need to restore state here */
720         return 0;
721 }
722 #endif /* CONFIG_PM_SLEEP */
723
724 const static struct virtio_device_id id_table[] = {
725         { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
726         {},
727 };
728
729 const static unsigned int feature_table[] = {};
730
731 static struct virtio_driver virtio_fs_driver = {
732         .driver.name            = KBUILD_MODNAME,
733         .driver.owner           = THIS_MODULE,
734         .id_table               = id_table,
735         .feature_table          = feature_table,
736         .feature_table_size     = ARRAY_SIZE(feature_table),
737         .probe                  = virtio_fs_probe,
738         .remove                 = virtio_fs_remove,
739 #ifdef CONFIG_PM_SLEEP
740         .freeze                 = virtio_fs_freeze,
741         .restore                = virtio_fs_restore,
742 #endif
743 };
744
745 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
746 __releases(fiq->lock)
747 {
748         struct fuse_forget_link *link;
749         struct virtio_fs_forget *forget;
750         struct scatterlist sg;
751         struct scatterlist *sgs[] = {&sg};
752         struct virtio_fs *fs;
753         struct virtqueue *vq;
754         struct virtio_fs_vq *fsvq;
755         bool notify;
756         u64 unique;
757         int ret;
758
759         link = fuse_dequeue_forget(fiq, 1, NULL);
760         unique = fuse_get_unique(fiq);
761
762         fs = fiq->priv;
763         fsvq = &fs->vqs[VQ_HIPRIO];
764         spin_unlock(&fiq->lock);
765
766         /* Allocate a buffer for the request */
767         forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
768
769         forget->ih = (struct fuse_in_header){
770                 .opcode = FUSE_FORGET,
771                 .nodeid = link->forget_one.nodeid,
772                 .unique = unique,
773                 .len = sizeof(*forget),
774         };
775         forget->arg = (struct fuse_forget_in){
776                 .nlookup = link->forget_one.nlookup,
777         };
778
779         sg_init_one(&sg, forget, sizeof(*forget));
780
781         /* Enqueue the request */
782         spin_lock(&fsvq->lock);
783
784         if (!fsvq->connected) {
785                 kfree(forget);
786                 spin_unlock(&fsvq->lock);
787                 goto out;
788         }
789
790         vq = fsvq->vq;
791         dev_dbg(&vq->vdev->dev, "%s\n", __func__);
792
793         ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
794         if (ret < 0) {
795                 if (ret == -ENOMEM || ret == -ENOSPC) {
796                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
797                                  ret);
798                         list_add_tail(&forget->list, &fsvq->queued_reqs);
799                         schedule_delayed_work(&fsvq->dispatch_work,
800                                         msecs_to_jiffies(1));
801                         inc_in_flight_req(fsvq);
802                 } else {
803                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
804                                  ret);
805                         kfree(forget);
806                 }
807                 spin_unlock(&fsvq->lock);
808                 goto out;
809         }
810
811         inc_in_flight_req(fsvq);
812         notify = virtqueue_kick_prepare(vq);
813
814         spin_unlock(&fsvq->lock);
815
816         if (notify)
817                 virtqueue_notify(vq);
818 out:
819         kfree(link);
820 }
821
822 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
823 __releases(fiq->lock)
824 {
825         /*
826          * TODO interrupts.
827          *
828          * Normal fs operations on a local filesystems aren't interruptible.
829          * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
830          * with shared lock between host and guest.
831          */
832         spin_unlock(&fiq->lock);
833 }
834
835 /* Return the number of scatter-gather list elements required */
836 static unsigned int sg_count_fuse_req(struct fuse_req *req)
837 {
838         struct fuse_args *args = req->args;
839         struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
840         unsigned int total_sgs = 1 /* fuse_in_header */;
841
842         if (args->in_numargs - args->in_pages)
843                 total_sgs += 1;
844
845         if (args->in_pages)
846                 total_sgs += ap->num_pages;
847
848         if (!test_bit(FR_ISREPLY, &req->flags))
849                 return total_sgs;
850
851         total_sgs += 1 /* fuse_out_header */;
852
853         if (args->out_numargs - args->out_pages)
854                 total_sgs += 1;
855
856         if (args->out_pages)
857                 total_sgs += ap->num_pages;
858
859         return total_sgs;
860 }
861
862 /* Add pages to scatter-gather list and return number of elements used */
863 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
864                                        struct page **pages,
865                                        struct fuse_page_desc *page_descs,
866                                        unsigned int num_pages,
867                                        unsigned int total_len)
868 {
869         unsigned int i;
870         unsigned int this_len;
871
872         for (i = 0; i < num_pages && total_len; i++) {
873                 sg_init_table(&sg[i], 1);
874                 this_len =  min(page_descs[i].length, total_len);
875                 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
876                 total_len -= this_len;
877         }
878
879         return i;
880 }
881
882 /* Add args to scatter-gather list and return number of elements used */
883 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
884                                       struct fuse_req *req,
885                                       struct fuse_arg *args,
886                                       unsigned int numargs,
887                                       bool argpages,
888                                       void *argbuf,
889                                       unsigned int *len_used)
890 {
891         struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
892         unsigned int total_sgs = 0;
893         unsigned int len;
894
895         len = fuse_len_args(numargs - argpages, args);
896         if (len)
897                 sg_init_one(&sg[total_sgs++], argbuf, len);
898
899         if (argpages)
900                 total_sgs += sg_init_fuse_pages(&sg[total_sgs],
901                                                 ap->pages, ap->descs,
902                                                 ap->num_pages,
903                                                 args[numargs - 1].size);
904
905         if (len_used)
906                 *len_used = len;
907
908         return total_sgs;
909 }
910
911 /* Add a request to a virtqueue and kick the device */
912 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
913                                  struct fuse_req *req, bool in_flight)
914 {
915         /* requests need at least 4 elements */
916         struct scatterlist *stack_sgs[6];
917         struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
918         struct scatterlist **sgs = stack_sgs;
919         struct scatterlist *sg = stack_sg;
920         struct virtqueue *vq;
921         struct fuse_args *args = req->args;
922         unsigned int argbuf_used = 0;
923         unsigned int out_sgs = 0;
924         unsigned int in_sgs = 0;
925         unsigned int total_sgs;
926         unsigned int i;
927         int ret;
928         bool notify;
929         struct fuse_pqueue *fpq;
930
931         /* Does the sglist fit on the stack? */
932         total_sgs = sg_count_fuse_req(req);
933         if (total_sgs > ARRAY_SIZE(stack_sgs)) {
934                 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
935                 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
936                 if (!sgs || !sg) {
937                         ret = -ENOMEM;
938                         goto out;
939                 }
940         }
941
942         /* Use a bounce buffer since stack args cannot be mapped */
943         ret = copy_args_to_argbuf(req);
944         if (ret < 0)
945                 goto out;
946
947         /* Request elements */
948         sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
949         out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
950                                      (struct fuse_arg *)args->in_args,
951                                      args->in_numargs, args->in_pages,
952                                      req->argbuf, &argbuf_used);
953
954         /* Reply elements */
955         if (test_bit(FR_ISREPLY, &req->flags)) {
956                 sg_init_one(&sg[out_sgs + in_sgs++],
957                             &req->out.h, sizeof(req->out.h));
958                 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
959                                             args->out_args, args->out_numargs,
960                                             args->out_pages,
961                                             req->argbuf + argbuf_used, NULL);
962         }
963
964         WARN_ON(out_sgs + in_sgs != total_sgs);
965
966         for (i = 0; i < total_sgs; i++)
967                 sgs[i] = &sg[i];
968
969         spin_lock(&fsvq->lock);
970
971         if (!fsvq->connected) {
972                 spin_unlock(&fsvq->lock);
973                 ret = -ENOTCONN;
974                 goto out;
975         }
976
977         vq = fsvq->vq;
978         ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
979         if (ret < 0) {
980                 spin_unlock(&fsvq->lock);
981                 goto out;
982         }
983
984         /* Request successfully sent. */
985         fpq = &fsvq->fud->pq;
986         spin_lock(&fpq->lock);
987         list_add_tail(&req->list, fpq->processing);
988         spin_unlock(&fpq->lock);
989         set_bit(FR_SENT, &req->flags);
990         /* matches barrier in request_wait_answer() */
991         smp_mb__after_atomic();
992
993         if (!in_flight)
994                 inc_in_flight_req(fsvq);
995         notify = virtqueue_kick_prepare(vq);
996
997         spin_unlock(&fsvq->lock);
998
999         if (notify)
1000                 virtqueue_notify(vq);
1001
1002 out:
1003         if (ret < 0 && req->argbuf) {
1004                 kfree(req->argbuf);
1005                 req->argbuf = NULL;
1006         }
1007         if (sgs != stack_sgs) {
1008                 kfree(sgs);
1009                 kfree(sg);
1010         }
1011
1012         return ret;
1013 }
1014
1015 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
1016 __releases(fiq->lock)
1017 {
1018         unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
1019         struct virtio_fs *fs;
1020         struct fuse_req *req;
1021         struct virtio_fs_vq *fsvq;
1022         int ret;
1023
1024         WARN_ON(list_empty(&fiq->pending));
1025         req = list_last_entry(&fiq->pending, struct fuse_req, list);
1026         clear_bit(FR_PENDING, &req->flags);
1027         list_del_init(&req->list);
1028         WARN_ON(!list_empty(&fiq->pending));
1029         spin_unlock(&fiq->lock);
1030
1031         fs = fiq->priv;
1032
1033         pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
1034                   __func__, req->in.h.opcode, req->in.h.unique,
1035                  req->in.h.nodeid, req->in.h.len,
1036                  fuse_len_args(req->args->out_numargs, req->args->out_args));
1037
1038         fsvq = &fs->vqs[queue_id];
1039         ret = virtio_fs_enqueue_req(fsvq, req, false);
1040         if (ret < 0) {
1041                 if (ret == -ENOMEM || ret == -ENOSPC) {
1042                         /*
1043                          * Virtqueue full. Retry submission from worker
1044                          * context as we might be holding fc->bg_lock.
1045                          */
1046                         spin_lock(&fsvq->lock);
1047                         list_add_tail(&req->list, &fsvq->queued_reqs);
1048                         inc_in_flight_req(fsvq);
1049                         schedule_delayed_work(&fsvq->dispatch_work,
1050                                                 msecs_to_jiffies(1));
1051                         spin_unlock(&fsvq->lock);
1052                         return;
1053                 }
1054                 req->out.h.error = ret;
1055                 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
1056
1057                 /* Can't end request in submission context. Use a worker */
1058                 spin_lock(&fsvq->lock);
1059                 list_add_tail(&req->list, &fsvq->end_reqs);
1060                 schedule_delayed_work(&fsvq->dispatch_work, 0);
1061                 spin_unlock(&fsvq->lock);
1062                 return;
1063         }
1064 }
1065
1066 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1067         .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
1068         .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
1069         .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
1070         .release                        = virtio_fs_fiq_release,
1071 };
1072
1073 static int virtio_fs_fill_super(struct super_block *sb)
1074 {
1075         struct fuse_conn *fc = get_fuse_conn_super(sb);
1076         struct virtio_fs *fs = fc->iq.priv;
1077         unsigned int i;
1078         int err;
1079         struct fuse_fs_context ctx = {
1080                 .rootmode = S_IFDIR,
1081                 .default_permissions = 1,
1082                 .allow_other = 1,
1083                 .max_read = UINT_MAX,
1084                 .blksize = 512,
1085                 .destroy = true,
1086                 .no_control = true,
1087                 .no_force_umount = true,
1088                 .no_mount_options = true,
1089         };
1090
1091         mutex_lock(&virtio_fs_mutex);
1092
1093         /* After holding mutex, make sure virtiofs device is still there.
1094          * Though we are holding a reference to it, drive ->remove might
1095          * still have cleaned up virtual queues. In that case bail out.
1096          */
1097         err = -EINVAL;
1098         if (list_empty(&fs->list)) {
1099                 pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1100                 goto err;
1101         }
1102
1103         err = -ENOMEM;
1104         /* Allocate fuse_dev for hiprio and notification queues */
1105         for (i = 0; i < VQ_REQUEST; i++) {
1106                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1107
1108                 fsvq->fud = fuse_dev_alloc();
1109                 if (!fsvq->fud)
1110                         goto err_free_fuse_devs;
1111         }
1112
1113         ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1114         err = fuse_fill_super_common(sb, &ctx);
1115         if (err < 0)
1116                 goto err_free_fuse_devs;
1117
1118         fc = fs->vqs[VQ_REQUEST].fud->fc;
1119
1120         for (i = 0; i < fs->nvqs; i++) {
1121                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1122
1123                 if (i == VQ_REQUEST)
1124                         continue; /* already initialized */
1125                 fuse_dev_install(fsvq->fud, fc);
1126         }
1127
1128         /* Previous unmount will stop all queues. Start these again */
1129         virtio_fs_start_all_queues(fs);
1130         fuse_send_init(fc);
1131         mutex_unlock(&virtio_fs_mutex);
1132         return 0;
1133
1134 err_free_fuse_devs:
1135         virtio_fs_free_devs(fs);
1136 err:
1137         mutex_unlock(&virtio_fs_mutex);
1138         return err;
1139 }
1140
1141 static void virtio_kill_sb(struct super_block *sb)
1142 {
1143         struct fuse_conn *fc = get_fuse_conn_super(sb);
1144         struct virtio_fs *vfs;
1145         struct virtio_fs_vq *fsvq;
1146
1147         /* If mount failed, we can still be called without any fc */
1148         if (!fc)
1149                 return fuse_kill_sb_anon(sb);
1150
1151         vfs = fc->iq.priv;
1152         fsvq = &vfs->vqs[VQ_HIPRIO];
1153
1154         /* Stop forget queue. Soon destroy will be sent */
1155         spin_lock(&fsvq->lock);
1156         fsvq->connected = false;
1157         spin_unlock(&fsvq->lock);
1158         virtio_fs_drain_all_queues(vfs);
1159
1160         fuse_kill_sb_anon(sb);
1161
1162         /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1163          * and drain one more time and free fuse devices. Freeing fuse
1164          * devices will drop their reference on fuse_conn and that in
1165          * turn will drop its reference on virtio_fs object.
1166          */
1167         virtio_fs_stop_all_queues(vfs);
1168         virtio_fs_drain_all_queues(vfs);
1169         virtio_fs_free_devs(vfs);
1170 }
1171
1172 static int virtio_fs_test_super(struct super_block *sb,
1173                                 struct fs_context *fsc)
1174 {
1175         struct fuse_conn *fc = fsc->s_fs_info;
1176
1177         return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1178 }
1179
1180 static int virtio_fs_set_super(struct super_block *sb,
1181                                struct fs_context *fsc)
1182 {
1183         int err;
1184
1185         err = get_anon_bdev(&sb->s_dev);
1186         if (!err)
1187                 fuse_conn_get(fsc->s_fs_info);
1188
1189         return err;
1190 }
1191
1192 static int virtio_fs_get_tree(struct fs_context *fsc)
1193 {
1194         struct virtio_fs *fs;
1195         struct super_block *sb;
1196         struct fuse_conn *fc;
1197         int err;
1198
1199         /* This gets a reference on virtio_fs object. This ptr gets installed
1200          * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1201          * to drop the reference to this object.
1202          */
1203         fs = virtio_fs_find_instance(fsc->source);
1204         if (!fs) {
1205                 pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1206                 return -EINVAL;
1207         }
1208
1209         fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1210         if (!fc) {
1211                 mutex_lock(&virtio_fs_mutex);
1212                 virtio_fs_put(fs);
1213                 mutex_unlock(&virtio_fs_mutex);
1214                 return -ENOMEM;
1215         }
1216
1217         fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1218                        fs);
1219         fc->release = fuse_free_conn;
1220         fc->delete_stale = true;
1221
1222         fsc->s_fs_info = fc;
1223         sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1224         fuse_conn_put(fc);
1225         if (IS_ERR(sb))
1226                 return PTR_ERR(sb);
1227
1228         if (!sb->s_root) {
1229                 err = virtio_fs_fill_super(sb);
1230                 if (err) {
1231                         deactivate_locked_super(sb);
1232                         return err;
1233                 }
1234
1235                 sb->s_flags |= SB_ACTIVE;
1236         }
1237
1238         WARN_ON(fsc->root);
1239         fsc->root = dget(sb->s_root);
1240         return 0;
1241 }
1242
1243 static const struct fs_context_operations virtio_fs_context_ops = {
1244         .get_tree       = virtio_fs_get_tree,
1245 };
1246
1247 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1248 {
1249         fsc->ops = &virtio_fs_context_ops;
1250         return 0;
1251 }
1252
1253 static struct file_system_type virtio_fs_type = {
1254         .owner          = THIS_MODULE,
1255         .name           = "virtiofs",
1256         .init_fs_context = virtio_fs_init_fs_context,
1257         .kill_sb        = virtio_kill_sb,
1258 };
1259
1260 static int __init virtio_fs_init(void)
1261 {
1262         int ret;
1263
1264         ret = register_virtio_driver(&virtio_fs_driver);
1265         if (ret < 0)
1266                 return ret;
1267
1268         ret = register_filesystem(&virtio_fs_type);
1269         if (ret < 0) {
1270                 unregister_virtio_driver(&virtio_fs_driver);
1271                 return ret;
1272         }
1273
1274         return 0;
1275 }
1276 module_init(virtio_fs_init);
1277
1278 static void __exit virtio_fs_exit(void)
1279 {
1280         unregister_filesystem(&virtio_fs_type);
1281         unregister_virtio_driver(&virtio_fs_driver);
1282 }
1283 module_exit(virtio_fs_exit);
1284
1285 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1286 MODULE_DESCRIPTION("Virtio Filesystem");
1287 MODULE_LICENSE("GPL");
1288 MODULE_ALIAS_FS(KBUILD_MODNAME);
1289 MODULE_DEVICE_TABLE(virtio, id_table);