GNU Linux-libre 6.1.86-gnu
[releases.git] / fs / fuse / inode.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/seq_file.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/statfs.h>
21 #include <linux/random.h>
22 #include <linux/sched.h>
23 #include <linux/exportfs.h>
24 #include <linux/posix_acl.h>
25 #include <linux/pid_namespace.h>
26 #include <uapi/linux/magic.h>
27
28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
29 MODULE_DESCRIPTION("Filesystem in Userspace");
30 MODULE_LICENSE("GPL");
31
32 static struct kmem_cache *fuse_inode_cachep;
33 struct list_head fuse_conn_list;
34 DEFINE_MUTEX(fuse_mutex);
35
36 static int set_global_limit(const char *val, const struct kernel_param *kp);
37
38 unsigned max_user_bgreq;
39 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
40                   &max_user_bgreq, 0644);
41 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
42 MODULE_PARM_DESC(max_user_bgreq,
43  "Global limit for the maximum number of backgrounded requests an "
44  "unprivileged user can set");
45
46 unsigned max_user_congthresh;
47 module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
48                   &max_user_congthresh, 0644);
49 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
50 MODULE_PARM_DESC(max_user_congthresh,
51  "Global limit for the maximum congestion threshold an "
52  "unprivileged user can set");
53
54 #define FUSE_DEFAULT_BLKSIZE 512
55
56 /** Maximum number of outstanding background requests */
57 #define FUSE_DEFAULT_MAX_BACKGROUND 12
58
59 /** Congestion starts at 75% of maximum */
60 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
61
62 #ifdef CONFIG_BLOCK
63 static struct file_system_type fuseblk_fs_type;
64 #endif
65
66 struct fuse_forget_link *fuse_alloc_forget(void)
67 {
68         return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
69 }
70
71 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
72 {
73         struct fuse_submount_lookup *sl;
74
75         sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
76         if (!sl)
77                 return NULL;
78         sl->forget = fuse_alloc_forget();
79         if (!sl->forget)
80                 goto out_free;
81
82         return sl;
83
84 out_free:
85         kfree(sl);
86         return NULL;
87 }
88
89 static struct inode *fuse_alloc_inode(struct super_block *sb)
90 {
91         struct fuse_inode *fi;
92
93         fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
94         if (!fi)
95                 return NULL;
96
97         fi->i_time = 0;
98         fi->inval_mask = 0;
99         fi->nodeid = 0;
100         fi->nlookup = 0;
101         fi->attr_version = 0;
102         fi->orig_ino = 0;
103         fi->state = 0;
104         fi->submount_lookup = NULL;
105         mutex_init(&fi->mutex);
106         spin_lock_init(&fi->lock);
107         fi->forget = fuse_alloc_forget();
108         if (!fi->forget)
109                 goto out_free;
110
111         if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
112                 goto out_free_forget;
113
114         return &fi->inode;
115
116 out_free_forget:
117         kfree(fi->forget);
118 out_free:
119         kmem_cache_free(fuse_inode_cachep, fi);
120         return NULL;
121 }
122
123 static void fuse_free_inode(struct inode *inode)
124 {
125         struct fuse_inode *fi = get_fuse_inode(inode);
126
127         mutex_destroy(&fi->mutex);
128         kfree(fi->forget);
129 #ifdef CONFIG_FUSE_DAX
130         kfree(fi->dax);
131 #endif
132         kmem_cache_free(fuse_inode_cachep, fi);
133 }
134
135 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
136                                          struct fuse_submount_lookup *sl)
137 {
138         if (!refcount_dec_and_test(&sl->count))
139                 return;
140
141         fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
142         sl->forget = NULL;
143         kfree(sl);
144 }
145
146 static void fuse_evict_inode(struct inode *inode)
147 {
148         struct fuse_inode *fi = get_fuse_inode(inode);
149
150         /* Will write inode on close/munmap and in all other dirtiers */
151         WARN_ON(inode->i_state & I_DIRTY_INODE);
152
153         truncate_inode_pages_final(&inode->i_data);
154         clear_inode(inode);
155         if (inode->i_sb->s_flags & SB_ACTIVE) {
156                 struct fuse_conn *fc = get_fuse_conn(inode);
157
158                 if (FUSE_IS_DAX(inode))
159                         fuse_dax_inode_cleanup(inode);
160                 if (fi->nlookup) {
161                         fuse_queue_forget(fc, fi->forget, fi->nodeid,
162                                           fi->nlookup);
163                         fi->forget = NULL;
164                 }
165
166                 if (fi->submount_lookup) {
167                         fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
168                         fi->submount_lookup = NULL;
169                 }
170         }
171         if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
172                 WARN_ON(!list_empty(&fi->write_files));
173                 WARN_ON(!list_empty(&fi->queued_writes));
174         }
175 }
176
177 static int fuse_reconfigure(struct fs_context *fsc)
178 {
179         struct super_block *sb = fsc->root->d_sb;
180
181         sync_filesystem(sb);
182         if (fsc->sb_flags & SB_MANDLOCK)
183                 return -EINVAL;
184
185         return 0;
186 }
187
188 /*
189  * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
190  * so that it will fit.
191  */
192 static ino_t fuse_squash_ino(u64 ino64)
193 {
194         ino_t ino = (ino_t) ino64;
195         if (sizeof(ino_t) < sizeof(u64))
196                 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
197         return ino;
198 }
199
200 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
201                                    u64 attr_valid, u32 cache_mask)
202 {
203         struct fuse_conn *fc = get_fuse_conn(inode);
204         struct fuse_inode *fi = get_fuse_inode(inode);
205
206         lockdep_assert_held(&fi->lock);
207
208         fi->attr_version = atomic64_inc_return(&fc->attr_version);
209         fi->i_time = attr_valid;
210         WRITE_ONCE(fi->inval_mask, 0);
211
212         inode->i_ino     = fuse_squash_ino(attr->ino);
213         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
214         set_nlink(inode, attr->nlink);
215         inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
216         inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
217         inode->i_blocks  = attr->blocks;
218
219         /* Sanitize nsecs */
220         attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
221         attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
222         attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
223
224         inode->i_atime.tv_sec   = attr->atime;
225         inode->i_atime.tv_nsec  = attr->atimensec;
226         /* mtime from server may be stale due to local buffered write */
227         if (!(cache_mask & STATX_MTIME)) {
228                 inode->i_mtime.tv_sec   = attr->mtime;
229                 inode->i_mtime.tv_nsec  = attr->mtimensec;
230         }
231         if (!(cache_mask & STATX_CTIME)) {
232                 inode->i_ctime.tv_sec   = attr->ctime;
233                 inode->i_ctime.tv_nsec  = attr->ctimensec;
234         }
235
236         if (attr->blksize != 0)
237                 inode->i_blkbits = ilog2(attr->blksize);
238         else
239                 inode->i_blkbits = inode->i_sb->s_blocksize_bits;
240
241         /*
242          * Don't set the sticky bit in i_mode, unless we want the VFS
243          * to check permissions.  This prevents failures due to the
244          * check in may_delete().
245          */
246         fi->orig_i_mode = inode->i_mode;
247         if (!fc->default_permissions)
248                 inode->i_mode &= ~S_ISVTX;
249
250         fi->orig_ino = attr->ino;
251
252         /*
253          * We are refreshing inode data and it is possible that another
254          * client set suid/sgid or security.capability xattr. So clear
255          * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
256          * was set or if security.capability xattr was set. But we don't
257          * know if security.capability has been set or not. So clear it
258          * anyway. Its less efficient but should be safe.
259          */
260         inode->i_flags &= ~S_NOSEC;
261 }
262
263 u32 fuse_get_cache_mask(struct inode *inode)
264 {
265         struct fuse_conn *fc = get_fuse_conn(inode);
266
267         if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
268                 return 0;
269
270         return STATX_MTIME | STATX_CTIME | STATX_SIZE;
271 }
272
273 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
274                             u64 attr_valid, u64 attr_version)
275 {
276         struct fuse_conn *fc = get_fuse_conn(inode);
277         struct fuse_inode *fi = get_fuse_inode(inode);
278         u32 cache_mask;
279         loff_t oldsize;
280         struct timespec64 old_mtime;
281
282         spin_lock(&fi->lock);
283         /*
284          * In case of writeback_cache enabled, writes update mtime, ctime and
285          * may update i_size.  In these cases trust the cached value in the
286          * inode.
287          */
288         cache_mask = fuse_get_cache_mask(inode);
289         if (cache_mask & STATX_SIZE)
290                 attr->size = i_size_read(inode);
291
292         if (cache_mask & STATX_MTIME) {
293                 attr->mtime = inode->i_mtime.tv_sec;
294                 attr->mtimensec = inode->i_mtime.tv_nsec;
295         }
296         if (cache_mask & STATX_CTIME) {
297                 attr->ctime = inode->i_ctime.tv_sec;
298                 attr->ctimensec = inode->i_ctime.tv_nsec;
299         }
300
301         if ((attr_version != 0 && fi->attr_version > attr_version) ||
302             test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
303                 spin_unlock(&fi->lock);
304                 return;
305         }
306
307         old_mtime = inode->i_mtime;
308         fuse_change_attributes_common(inode, attr, attr_valid, cache_mask);
309
310         oldsize = inode->i_size;
311         /*
312          * In case of writeback_cache enabled, the cached writes beyond EOF
313          * extend local i_size without keeping userspace server in sync. So,
314          * attr->size coming from server can be stale. We cannot trust it.
315          */
316         if (!(cache_mask & STATX_SIZE))
317                 i_size_write(inode, attr->size);
318         spin_unlock(&fi->lock);
319
320         if (!cache_mask && S_ISREG(inode->i_mode)) {
321                 bool inval = false;
322
323                 if (oldsize != attr->size) {
324                         truncate_pagecache(inode, attr->size);
325                         if (!fc->explicit_inval_data)
326                                 inval = true;
327                 } else if (fc->auto_inval_data) {
328                         struct timespec64 new_mtime = {
329                                 .tv_sec = attr->mtime,
330                                 .tv_nsec = attr->mtimensec,
331                         };
332
333                         /*
334                          * Auto inval mode also checks and invalidates if mtime
335                          * has changed.
336                          */
337                         if (!timespec64_equal(&old_mtime, &new_mtime))
338                                 inval = true;
339                 }
340
341                 if (inval)
342                         invalidate_inode_pages2(inode->i_mapping);
343         }
344
345         if (IS_ENABLED(CONFIG_FUSE_DAX))
346                 fuse_dax_dontcache(inode, attr->flags);
347 }
348
349 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
350                                       u64 nodeid)
351 {
352         sl->nodeid = nodeid;
353         refcount_set(&sl->count, 1);
354 }
355
356 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
357 {
358         inode->i_mode = attr->mode & S_IFMT;
359         inode->i_size = attr->size;
360         inode->i_mtime.tv_sec  = attr->mtime;
361         inode->i_mtime.tv_nsec = attr->mtimensec;
362         inode->i_ctime.tv_sec  = attr->ctime;
363         inode->i_ctime.tv_nsec = attr->ctimensec;
364         if (S_ISREG(inode->i_mode)) {
365                 fuse_init_common(inode);
366                 fuse_init_file_inode(inode, attr->flags);
367         } else if (S_ISDIR(inode->i_mode))
368                 fuse_init_dir(inode);
369         else if (S_ISLNK(inode->i_mode))
370                 fuse_init_symlink(inode);
371         else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
372                  S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
373                 fuse_init_common(inode);
374                 init_special_inode(inode, inode->i_mode,
375                                    new_decode_dev(attr->rdev));
376         } else
377                 BUG();
378 }
379
380 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
381 {
382         u64 nodeid = *(u64 *) _nodeidp;
383         if (get_node_id(inode) == nodeid)
384                 return 1;
385         else
386                 return 0;
387 }
388
389 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
390 {
391         u64 nodeid = *(u64 *) _nodeidp;
392         get_fuse_inode(inode)->nodeid = nodeid;
393         return 0;
394 }
395
396 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
397                         int generation, struct fuse_attr *attr,
398                         u64 attr_valid, u64 attr_version)
399 {
400         struct inode *inode;
401         struct fuse_inode *fi;
402         struct fuse_conn *fc = get_fuse_conn_super(sb);
403
404         /*
405          * Auto mount points get their node id from the submount root, which is
406          * not a unique identifier within this filesystem.
407          *
408          * To avoid conflicts, do not place submount points into the inode hash
409          * table.
410          */
411         if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
412             S_ISDIR(attr->mode)) {
413                 struct fuse_inode *fi;
414
415                 inode = new_inode(sb);
416                 if (!inode)
417                         return NULL;
418
419                 fuse_init_inode(inode, attr);
420                 fi = get_fuse_inode(inode);
421                 fi->nodeid = nodeid;
422                 fi->submount_lookup = fuse_alloc_submount_lookup();
423                 if (!fi->submount_lookup) {
424                         iput(inode);
425                         return NULL;
426                 }
427                 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
428                 fuse_init_submount_lookup(fi->submount_lookup, nodeid);
429                 inode->i_flags |= S_AUTOMOUNT;
430                 goto done;
431         }
432
433 retry:
434         inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
435         if (!inode)
436                 return NULL;
437
438         if ((inode->i_state & I_NEW)) {
439                 inode->i_flags |= S_NOATIME;
440                 if (!fc->writeback_cache || !S_ISREG(attr->mode))
441                         inode->i_flags |= S_NOCMTIME;
442                 inode->i_generation = generation;
443                 fuse_init_inode(inode, attr);
444                 unlock_new_inode(inode);
445         } else if (fuse_stale_inode(inode, generation, attr)) {
446                 /* nodeid was reused, any I/O on the old inode should fail */
447                 fuse_make_bad(inode);
448                 if (inode != d_inode(sb->s_root)) {
449                         remove_inode_hash(inode);
450                         iput(inode);
451                         goto retry;
452                 }
453         }
454         fi = get_fuse_inode(inode);
455         spin_lock(&fi->lock);
456         fi->nlookup++;
457         spin_unlock(&fi->lock);
458 done:
459         fuse_change_attributes(inode, attr, attr_valid, attr_version);
460
461         return inode;
462 }
463
464 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
465                            struct fuse_mount **fm)
466 {
467         struct fuse_mount *fm_iter;
468         struct inode *inode;
469
470         WARN_ON(!rwsem_is_locked(&fc->killsb));
471         list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
472                 if (!fm_iter->sb)
473                         continue;
474
475                 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
476                 if (inode) {
477                         if (fm)
478                                 *fm = fm_iter;
479                         return inode;
480                 }
481         }
482
483         return NULL;
484 }
485
486 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
487                              loff_t offset, loff_t len)
488 {
489         struct fuse_inode *fi;
490         struct inode *inode;
491         pgoff_t pg_start;
492         pgoff_t pg_end;
493
494         inode = fuse_ilookup(fc, nodeid, NULL);
495         if (!inode)
496                 return -ENOENT;
497
498         fi = get_fuse_inode(inode);
499         spin_lock(&fi->lock);
500         fi->attr_version = atomic64_inc_return(&fc->attr_version);
501         spin_unlock(&fi->lock);
502
503         fuse_invalidate_attr(inode);
504         forget_all_cached_acls(inode);
505         if (offset >= 0) {
506                 pg_start = offset >> PAGE_SHIFT;
507                 if (len <= 0)
508                         pg_end = -1;
509                 else
510                         pg_end = (offset + len - 1) >> PAGE_SHIFT;
511                 invalidate_inode_pages2_range(inode->i_mapping,
512                                               pg_start, pg_end);
513         }
514         iput(inode);
515         return 0;
516 }
517
518 bool fuse_lock_inode(struct inode *inode)
519 {
520         bool locked = false;
521
522         if (!get_fuse_conn(inode)->parallel_dirops) {
523                 mutex_lock(&get_fuse_inode(inode)->mutex);
524                 locked = true;
525         }
526
527         return locked;
528 }
529
530 void fuse_unlock_inode(struct inode *inode, bool locked)
531 {
532         if (locked)
533                 mutex_unlock(&get_fuse_inode(inode)->mutex);
534 }
535
536 static void fuse_umount_begin(struct super_block *sb)
537 {
538         struct fuse_conn *fc = get_fuse_conn_super(sb);
539
540         if (fc->no_force_umount)
541                 return;
542
543         fuse_abort_conn(fc);
544
545         // Only retire block-device-based superblocks.
546         if (sb->s_bdev != NULL)
547                 retire_super(sb);
548 }
549
550 static void fuse_send_destroy(struct fuse_mount *fm)
551 {
552         if (fm->fc->conn_init) {
553                 FUSE_ARGS(args);
554
555                 args.opcode = FUSE_DESTROY;
556                 args.force = true;
557                 args.nocreds = true;
558                 fuse_simple_request(fm, &args);
559         }
560 }
561
562 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
563 {
564         stbuf->f_type    = FUSE_SUPER_MAGIC;
565         stbuf->f_bsize   = attr->bsize;
566         stbuf->f_frsize  = attr->frsize;
567         stbuf->f_blocks  = attr->blocks;
568         stbuf->f_bfree   = attr->bfree;
569         stbuf->f_bavail  = attr->bavail;
570         stbuf->f_files   = attr->files;
571         stbuf->f_ffree   = attr->ffree;
572         stbuf->f_namelen = attr->namelen;
573         /* fsid is left zero */
574 }
575
576 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
577 {
578         struct super_block *sb = dentry->d_sb;
579         struct fuse_mount *fm = get_fuse_mount_super(sb);
580         FUSE_ARGS(args);
581         struct fuse_statfs_out outarg;
582         int err;
583
584         if (!fuse_allow_current_process(fm->fc)) {
585                 buf->f_type = FUSE_SUPER_MAGIC;
586                 return 0;
587         }
588
589         memset(&outarg, 0, sizeof(outarg));
590         args.in_numargs = 0;
591         args.opcode = FUSE_STATFS;
592         args.nodeid = get_node_id(d_inode(dentry));
593         args.out_numargs = 1;
594         args.out_args[0].size = sizeof(outarg);
595         args.out_args[0].value = &outarg;
596         err = fuse_simple_request(fm, &args);
597         if (!err)
598                 convert_fuse_statfs(buf, &outarg.st);
599         return err;
600 }
601
602 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
603 {
604         struct fuse_sync_bucket *bucket;
605
606         bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
607         if (bucket) {
608                 init_waitqueue_head(&bucket->waitq);
609                 /* Initial active count */
610                 atomic_set(&bucket->count, 1);
611         }
612         return bucket;
613 }
614
615 static void fuse_sync_fs_writes(struct fuse_conn *fc)
616 {
617         struct fuse_sync_bucket *bucket, *new_bucket;
618         int count;
619
620         new_bucket = fuse_sync_bucket_alloc();
621         spin_lock(&fc->lock);
622         bucket = rcu_dereference_protected(fc->curr_bucket, 1);
623         count = atomic_read(&bucket->count);
624         WARN_ON(count < 1);
625         /* No outstanding writes? */
626         if (count == 1) {
627                 spin_unlock(&fc->lock);
628                 kfree(new_bucket);
629                 return;
630         }
631
632         /*
633          * Completion of new bucket depends on completion of this bucket, so add
634          * one more count.
635          */
636         atomic_inc(&new_bucket->count);
637         rcu_assign_pointer(fc->curr_bucket, new_bucket);
638         spin_unlock(&fc->lock);
639         /*
640          * Drop initial active count.  At this point if all writes in this and
641          * ancestor buckets complete, the count will go to zero and this task
642          * will be woken up.
643          */
644         atomic_dec(&bucket->count);
645
646         wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
647
648         /* Drop temp count on descendant bucket */
649         fuse_sync_bucket_dec(new_bucket);
650         kfree_rcu(bucket, rcu);
651 }
652
653 static int fuse_sync_fs(struct super_block *sb, int wait)
654 {
655         struct fuse_mount *fm = get_fuse_mount_super(sb);
656         struct fuse_conn *fc = fm->fc;
657         struct fuse_syncfs_in inarg;
658         FUSE_ARGS(args);
659         int err;
660
661         /*
662          * Userspace cannot handle the wait == 0 case.  Avoid a
663          * gratuitous roundtrip.
664          */
665         if (!wait)
666                 return 0;
667
668         /* The filesystem is being unmounted.  Nothing to do. */
669         if (!sb->s_root)
670                 return 0;
671
672         if (!fc->sync_fs)
673                 return 0;
674
675         fuse_sync_fs_writes(fc);
676
677         memset(&inarg, 0, sizeof(inarg));
678         args.in_numargs = 1;
679         args.in_args[0].size = sizeof(inarg);
680         args.in_args[0].value = &inarg;
681         args.opcode = FUSE_SYNCFS;
682         args.nodeid = get_node_id(sb->s_root->d_inode);
683         args.out_numargs = 0;
684
685         err = fuse_simple_request(fm, &args);
686         if (err == -ENOSYS) {
687                 fc->sync_fs = 0;
688                 err = 0;
689         }
690
691         return err;
692 }
693
694 enum {
695         OPT_SOURCE,
696         OPT_SUBTYPE,
697         OPT_FD,
698         OPT_ROOTMODE,
699         OPT_USER_ID,
700         OPT_GROUP_ID,
701         OPT_DEFAULT_PERMISSIONS,
702         OPT_ALLOW_OTHER,
703         OPT_MAX_READ,
704         OPT_BLKSIZE,
705         OPT_ERR
706 };
707
708 static const struct fs_parameter_spec fuse_fs_parameters[] = {
709         fsparam_string  ("source",              OPT_SOURCE),
710         fsparam_u32     ("fd",                  OPT_FD),
711         fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
712         fsparam_u32     ("user_id",             OPT_USER_ID),
713         fsparam_u32     ("group_id",            OPT_GROUP_ID),
714         fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
715         fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
716         fsparam_u32     ("max_read",            OPT_MAX_READ),
717         fsparam_u32     ("blksize",             OPT_BLKSIZE),
718         fsparam_string  ("subtype",             OPT_SUBTYPE),
719         {}
720 };
721
722 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
723 {
724         struct fs_parse_result result;
725         struct fuse_fs_context *ctx = fsc->fs_private;
726         int opt;
727
728         if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
729                 /*
730                  * Ignore options coming from mount(MS_REMOUNT) for backward
731                  * compatibility.
732                  */
733                 if (fsc->oldapi)
734                         return 0;
735
736                 return invalfc(fsc, "No changes allowed in reconfigure");
737         }
738
739         opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
740         if (opt < 0)
741                 return opt;
742
743         switch (opt) {
744         case OPT_SOURCE:
745                 if (fsc->source)
746                         return invalfc(fsc, "Multiple sources specified");
747                 fsc->source = param->string;
748                 param->string = NULL;
749                 break;
750
751         case OPT_SUBTYPE:
752                 if (ctx->subtype)
753                         return invalfc(fsc, "Multiple subtypes specified");
754                 ctx->subtype = param->string;
755                 param->string = NULL;
756                 return 0;
757
758         case OPT_FD:
759                 ctx->fd = result.uint_32;
760                 ctx->fd_present = true;
761                 break;
762
763         case OPT_ROOTMODE:
764                 if (!fuse_valid_type(result.uint_32))
765                         return invalfc(fsc, "Invalid rootmode");
766                 ctx->rootmode = result.uint_32;
767                 ctx->rootmode_present = true;
768                 break;
769
770         case OPT_USER_ID:
771                 ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
772                 if (!uid_valid(ctx->user_id))
773                         return invalfc(fsc, "Invalid user_id");
774                 ctx->user_id_present = true;
775                 break;
776
777         case OPT_GROUP_ID:
778                 ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
779                 if (!gid_valid(ctx->group_id))
780                         return invalfc(fsc, "Invalid group_id");
781                 ctx->group_id_present = true;
782                 break;
783
784         case OPT_DEFAULT_PERMISSIONS:
785                 ctx->default_permissions = true;
786                 break;
787
788         case OPT_ALLOW_OTHER:
789                 ctx->allow_other = true;
790                 break;
791
792         case OPT_MAX_READ:
793                 ctx->max_read = result.uint_32;
794                 break;
795
796         case OPT_BLKSIZE:
797                 if (!ctx->is_bdev)
798                         return invalfc(fsc, "blksize only supported for fuseblk");
799                 ctx->blksize = result.uint_32;
800                 break;
801
802         default:
803                 return -EINVAL;
804         }
805
806         return 0;
807 }
808
809 static void fuse_free_fsc(struct fs_context *fsc)
810 {
811         struct fuse_fs_context *ctx = fsc->fs_private;
812
813         if (ctx) {
814                 kfree(ctx->subtype);
815                 kfree(ctx);
816         }
817 }
818
819 static int fuse_show_options(struct seq_file *m, struct dentry *root)
820 {
821         struct super_block *sb = root->d_sb;
822         struct fuse_conn *fc = get_fuse_conn_super(sb);
823
824         if (fc->legacy_opts_show) {
825                 seq_printf(m, ",user_id=%u",
826                            from_kuid_munged(fc->user_ns, fc->user_id));
827                 seq_printf(m, ",group_id=%u",
828                            from_kgid_munged(fc->user_ns, fc->group_id));
829                 if (fc->default_permissions)
830                         seq_puts(m, ",default_permissions");
831                 if (fc->allow_other)
832                         seq_puts(m, ",allow_other");
833                 if (fc->max_read != ~0)
834                         seq_printf(m, ",max_read=%u", fc->max_read);
835                 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
836                         seq_printf(m, ",blksize=%lu", sb->s_blocksize);
837         }
838 #ifdef CONFIG_FUSE_DAX
839         if (fc->dax_mode == FUSE_DAX_ALWAYS)
840                 seq_puts(m, ",dax=always");
841         else if (fc->dax_mode == FUSE_DAX_NEVER)
842                 seq_puts(m, ",dax=never");
843         else if (fc->dax_mode == FUSE_DAX_INODE_USER)
844                 seq_puts(m, ",dax=inode");
845 #endif
846
847         return 0;
848 }
849
850 static void fuse_iqueue_init(struct fuse_iqueue *fiq,
851                              const struct fuse_iqueue_ops *ops,
852                              void *priv)
853 {
854         memset(fiq, 0, sizeof(struct fuse_iqueue));
855         spin_lock_init(&fiq->lock);
856         init_waitqueue_head(&fiq->waitq);
857         INIT_LIST_HEAD(&fiq->pending);
858         INIT_LIST_HEAD(&fiq->interrupts);
859         fiq->forget_list_tail = &fiq->forget_list_head;
860         fiq->connected = 1;
861         fiq->ops = ops;
862         fiq->priv = priv;
863 }
864
865 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
866 {
867         unsigned int i;
868
869         spin_lock_init(&fpq->lock);
870         for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
871                 INIT_LIST_HEAD(&fpq->processing[i]);
872         INIT_LIST_HEAD(&fpq->io);
873         fpq->connected = 1;
874 }
875
876 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
877                     struct user_namespace *user_ns,
878                     const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
879 {
880         memset(fc, 0, sizeof(*fc));
881         spin_lock_init(&fc->lock);
882         spin_lock_init(&fc->bg_lock);
883         init_rwsem(&fc->killsb);
884         refcount_set(&fc->count, 1);
885         atomic_set(&fc->dev_count, 1);
886         init_waitqueue_head(&fc->blocked_waitq);
887         fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
888         INIT_LIST_HEAD(&fc->bg_queue);
889         INIT_LIST_HEAD(&fc->entry);
890         INIT_LIST_HEAD(&fc->devices);
891         atomic_set(&fc->num_waiting, 0);
892         fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
893         fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
894         atomic64_set(&fc->khctr, 0);
895         fc->polled_files = RB_ROOT;
896         fc->blocked = 0;
897         fc->initialized = 0;
898         fc->connected = 1;
899         atomic64_set(&fc->attr_version, 1);
900         get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
901         fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
902         fc->user_ns = get_user_ns(user_ns);
903         fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
904         fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
905
906         INIT_LIST_HEAD(&fc->mounts);
907         list_add(&fm->fc_entry, &fc->mounts);
908         fm->fc = fc;
909 }
910 EXPORT_SYMBOL_GPL(fuse_conn_init);
911
912 void fuse_conn_put(struct fuse_conn *fc)
913 {
914         if (refcount_dec_and_test(&fc->count)) {
915                 struct fuse_iqueue *fiq = &fc->iq;
916                 struct fuse_sync_bucket *bucket;
917
918                 if (IS_ENABLED(CONFIG_FUSE_DAX))
919                         fuse_dax_conn_free(fc);
920                 if (fiq->ops->release)
921                         fiq->ops->release(fiq);
922                 put_pid_ns(fc->pid_ns);
923                 put_user_ns(fc->user_ns);
924                 bucket = rcu_dereference_protected(fc->curr_bucket, 1);
925                 if (bucket) {
926                         WARN_ON(atomic_read(&bucket->count) != 1);
927                         kfree(bucket);
928                 }
929                 fc->release(fc);
930         }
931 }
932 EXPORT_SYMBOL_GPL(fuse_conn_put);
933
934 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
935 {
936         refcount_inc(&fc->count);
937         return fc;
938 }
939 EXPORT_SYMBOL_GPL(fuse_conn_get);
940
941 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
942 {
943         struct fuse_attr attr;
944         memset(&attr, 0, sizeof(attr));
945
946         attr.mode = mode;
947         attr.ino = FUSE_ROOT_ID;
948         attr.nlink = 1;
949         return fuse_iget(sb, 1, 0, &attr, 0, 0);
950 }
951
952 struct fuse_inode_handle {
953         u64 nodeid;
954         u32 generation;
955 };
956
957 static struct dentry *fuse_get_dentry(struct super_block *sb,
958                                       struct fuse_inode_handle *handle)
959 {
960         struct fuse_conn *fc = get_fuse_conn_super(sb);
961         struct inode *inode;
962         struct dentry *entry;
963         int err = -ESTALE;
964
965         if (handle->nodeid == 0)
966                 goto out_err;
967
968         inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
969         if (!inode) {
970                 struct fuse_entry_out outarg;
971                 const struct qstr name = QSTR_INIT(".", 1);
972
973                 if (!fc->export_support)
974                         goto out_err;
975
976                 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
977                                        &inode);
978                 if (err && err != -ENOENT)
979                         goto out_err;
980                 if (err || !inode) {
981                         err = -ESTALE;
982                         goto out_err;
983                 }
984                 err = -EIO;
985                 if (get_node_id(inode) != handle->nodeid)
986                         goto out_iput;
987         }
988         err = -ESTALE;
989         if (inode->i_generation != handle->generation)
990                 goto out_iput;
991
992         entry = d_obtain_alias(inode);
993         if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
994                 fuse_invalidate_entry_cache(entry);
995
996         return entry;
997
998  out_iput:
999         iput(inode);
1000  out_err:
1001         return ERR_PTR(err);
1002 }
1003
1004 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
1005                            struct inode *parent)
1006 {
1007         int len = parent ? 6 : 3;
1008         u64 nodeid;
1009         u32 generation;
1010
1011         if (*max_len < len) {
1012                 *max_len = len;
1013                 return  FILEID_INVALID;
1014         }
1015
1016         nodeid = get_fuse_inode(inode)->nodeid;
1017         generation = inode->i_generation;
1018
1019         fh[0] = (u32)(nodeid >> 32);
1020         fh[1] = (u32)(nodeid & 0xffffffff);
1021         fh[2] = generation;
1022
1023         if (parent) {
1024                 nodeid = get_fuse_inode(parent)->nodeid;
1025                 generation = parent->i_generation;
1026
1027                 fh[3] = (u32)(nodeid >> 32);
1028                 fh[4] = (u32)(nodeid & 0xffffffff);
1029                 fh[5] = generation;
1030         }
1031
1032         *max_len = len;
1033         return parent ? 0x82 : 0x81;
1034 }
1035
1036 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
1037                 struct fid *fid, int fh_len, int fh_type)
1038 {
1039         struct fuse_inode_handle handle;
1040
1041         if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
1042                 return NULL;
1043
1044         handle.nodeid = (u64) fid->raw[0] << 32;
1045         handle.nodeid |= (u64) fid->raw[1];
1046         handle.generation = fid->raw[2];
1047         return fuse_get_dentry(sb, &handle);
1048 }
1049
1050 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
1051                 struct fid *fid, int fh_len, int fh_type)
1052 {
1053         struct fuse_inode_handle parent;
1054
1055         if (fh_type != 0x82 || fh_len < 6)
1056                 return NULL;
1057
1058         parent.nodeid = (u64) fid->raw[3] << 32;
1059         parent.nodeid |= (u64) fid->raw[4];
1060         parent.generation = fid->raw[5];
1061         return fuse_get_dentry(sb, &parent);
1062 }
1063
1064 static struct dentry *fuse_get_parent(struct dentry *child)
1065 {
1066         struct inode *child_inode = d_inode(child);
1067         struct fuse_conn *fc = get_fuse_conn(child_inode);
1068         struct inode *inode;
1069         struct dentry *parent;
1070         struct fuse_entry_out outarg;
1071         int err;
1072
1073         if (!fc->export_support)
1074                 return ERR_PTR(-ESTALE);
1075
1076         err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
1077                                &dotdot_name, &outarg, &inode);
1078         if (err) {
1079                 if (err == -ENOENT)
1080                         return ERR_PTR(-ESTALE);
1081                 return ERR_PTR(err);
1082         }
1083
1084         parent = d_obtain_alias(inode);
1085         if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
1086                 fuse_invalidate_entry_cache(parent);
1087
1088         return parent;
1089 }
1090
1091 static const struct export_operations fuse_export_operations = {
1092         .fh_to_dentry   = fuse_fh_to_dentry,
1093         .fh_to_parent   = fuse_fh_to_parent,
1094         .encode_fh      = fuse_encode_fh,
1095         .get_parent     = fuse_get_parent,
1096 };
1097
1098 static const struct super_operations fuse_super_operations = {
1099         .alloc_inode    = fuse_alloc_inode,
1100         .free_inode     = fuse_free_inode,
1101         .evict_inode    = fuse_evict_inode,
1102         .write_inode    = fuse_write_inode,
1103         .drop_inode     = generic_delete_inode,
1104         .umount_begin   = fuse_umount_begin,
1105         .statfs         = fuse_statfs,
1106         .sync_fs        = fuse_sync_fs,
1107         .show_options   = fuse_show_options,
1108 };
1109
1110 static void sanitize_global_limit(unsigned *limit)
1111 {
1112         /*
1113          * The default maximum number of async requests is calculated to consume
1114          * 1/2^13 of the total memory, assuming 392 bytes per request.
1115          */
1116         if (*limit == 0)
1117                 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1118
1119         if (*limit >= 1 << 16)
1120                 *limit = (1 << 16) - 1;
1121 }
1122
1123 static int set_global_limit(const char *val, const struct kernel_param *kp)
1124 {
1125         int rv;
1126
1127         rv = param_set_uint(val, kp);
1128         if (rv)
1129                 return rv;
1130
1131         sanitize_global_limit((unsigned *)kp->arg);
1132
1133         return 0;
1134 }
1135
1136 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1137 {
1138         int cap_sys_admin = capable(CAP_SYS_ADMIN);
1139
1140         if (arg->minor < 13)
1141                 return;
1142
1143         sanitize_global_limit(&max_user_bgreq);
1144         sanitize_global_limit(&max_user_congthresh);
1145
1146         spin_lock(&fc->bg_lock);
1147         if (arg->max_background) {
1148                 fc->max_background = arg->max_background;
1149
1150                 if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1151                         fc->max_background = max_user_bgreq;
1152         }
1153         if (arg->congestion_threshold) {
1154                 fc->congestion_threshold = arg->congestion_threshold;
1155
1156                 if (!cap_sys_admin &&
1157                     fc->congestion_threshold > max_user_congthresh)
1158                         fc->congestion_threshold = max_user_congthresh;
1159         }
1160         spin_unlock(&fc->bg_lock);
1161 }
1162
1163 struct fuse_init_args {
1164         struct fuse_args args;
1165         struct fuse_init_in in;
1166         struct fuse_init_out out;
1167 };
1168
1169 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1170                                int error)
1171 {
1172         struct fuse_conn *fc = fm->fc;
1173         struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1174         struct fuse_init_out *arg = &ia->out;
1175         bool ok = true;
1176
1177         if (error || arg->major != FUSE_KERNEL_VERSION)
1178                 ok = false;
1179         else {
1180                 unsigned long ra_pages;
1181
1182                 process_init_limits(fc, arg);
1183
1184                 if (arg->minor >= 6) {
1185                         u64 flags = arg->flags;
1186
1187                         if (flags & FUSE_INIT_EXT)
1188                                 flags |= (u64) arg->flags2 << 32;
1189
1190                         ra_pages = arg->max_readahead / PAGE_SIZE;
1191                         if (flags & FUSE_ASYNC_READ)
1192                                 fc->async_read = 1;
1193                         if (!(flags & FUSE_POSIX_LOCKS))
1194                                 fc->no_lock = 1;
1195                         if (arg->minor >= 17) {
1196                                 if (!(flags & FUSE_FLOCK_LOCKS))
1197                                         fc->no_flock = 1;
1198                         } else {
1199                                 if (!(flags & FUSE_POSIX_LOCKS))
1200                                         fc->no_flock = 1;
1201                         }
1202                         if (flags & FUSE_ATOMIC_O_TRUNC)
1203                                 fc->atomic_o_trunc = 1;
1204                         if (arg->minor >= 9) {
1205                                 /* LOOKUP has dependency on proto version */
1206                                 if (flags & FUSE_EXPORT_SUPPORT)
1207                                         fc->export_support = 1;
1208                         }
1209                         if (flags & FUSE_BIG_WRITES)
1210                                 fc->big_writes = 1;
1211                         if (flags & FUSE_DONT_MASK)
1212                                 fc->dont_mask = 1;
1213                         if (flags & FUSE_AUTO_INVAL_DATA)
1214                                 fc->auto_inval_data = 1;
1215                         else if (flags & FUSE_EXPLICIT_INVAL_DATA)
1216                                 fc->explicit_inval_data = 1;
1217                         if (flags & FUSE_DO_READDIRPLUS) {
1218                                 fc->do_readdirplus = 1;
1219                                 if (flags & FUSE_READDIRPLUS_AUTO)
1220                                         fc->readdirplus_auto = 1;
1221                         }
1222                         if (flags & FUSE_ASYNC_DIO)
1223                                 fc->async_dio = 1;
1224                         if (flags & FUSE_WRITEBACK_CACHE)
1225                                 fc->writeback_cache = 1;
1226                         if (flags & FUSE_PARALLEL_DIROPS)
1227                                 fc->parallel_dirops = 1;
1228                         if (flags & FUSE_HANDLE_KILLPRIV)
1229                                 fc->handle_killpriv = 1;
1230                         if (arg->time_gran && arg->time_gran <= 1000000000)
1231                                 fm->sb->s_time_gran = arg->time_gran;
1232                         if ((flags & FUSE_POSIX_ACL)) {
1233                                 fc->default_permissions = 1;
1234                                 fc->posix_acl = 1;
1235                                 fm->sb->s_xattr = fuse_acl_xattr_handlers;
1236                         }
1237                         if (flags & FUSE_CACHE_SYMLINKS)
1238                                 fc->cache_symlinks = 1;
1239                         if (flags & FUSE_ABORT_ERROR)
1240                                 fc->abort_err = 1;
1241                         if (flags & FUSE_MAX_PAGES) {
1242                                 fc->max_pages =
1243                                         min_t(unsigned int, fc->max_pages_limit,
1244                                         max_t(unsigned int, arg->max_pages, 1));
1245                         }
1246                         if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1247                                 if (flags & FUSE_MAP_ALIGNMENT &&
1248                                     !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1249                                         ok = false;
1250                                 }
1251                                 if (flags & FUSE_HAS_INODE_DAX)
1252                                         fc->inode_dax = 1;
1253                         }
1254                         if (flags & FUSE_HANDLE_KILLPRIV_V2) {
1255                                 fc->handle_killpriv_v2 = 1;
1256                                 fm->sb->s_flags |= SB_NOSEC;
1257                         }
1258                         if (flags & FUSE_SETXATTR_EXT)
1259                                 fc->setxattr_ext = 1;
1260                         if (flags & FUSE_SECURITY_CTX)
1261                                 fc->init_security = 1;
1262                 } else {
1263                         ra_pages = fc->max_read / PAGE_SIZE;
1264                         fc->no_lock = 1;
1265                         fc->no_flock = 1;
1266                 }
1267
1268                 fm->sb->s_bdi->ra_pages =
1269                                 min(fm->sb->s_bdi->ra_pages, ra_pages);
1270                 fc->minor = arg->minor;
1271                 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1272                 fc->max_write = max_t(unsigned, 4096, fc->max_write);
1273                 fc->conn_init = 1;
1274         }
1275         kfree(ia);
1276
1277         if (!ok) {
1278                 fc->conn_init = 0;
1279                 fc->conn_error = 1;
1280         }
1281
1282         fuse_set_initialized(fc);
1283         wake_up_all(&fc->blocked_waitq);
1284 }
1285
1286 void fuse_send_init(struct fuse_mount *fm)
1287 {
1288         struct fuse_init_args *ia;
1289         u64 flags;
1290
1291         ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1292
1293         ia->in.major = FUSE_KERNEL_VERSION;
1294         ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1295         ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1296         flags =
1297                 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1298                 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1299                 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1300                 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1301                 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1302                 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1303                 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1304                 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1305                 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1306                 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
1307                 FUSE_SECURITY_CTX;
1308 #ifdef CONFIG_FUSE_DAX
1309         if (fm->fc->dax)
1310                 flags |= FUSE_MAP_ALIGNMENT;
1311         if (fuse_is_inode_dax_mode(fm->fc->dax_mode))
1312                 flags |= FUSE_HAS_INODE_DAX;
1313 #endif
1314         if (fm->fc->auto_submounts)
1315                 flags |= FUSE_SUBMOUNTS;
1316
1317         ia->in.flags = flags;
1318         ia->in.flags2 = flags >> 32;
1319
1320         ia->args.opcode = FUSE_INIT;
1321         ia->args.in_numargs = 1;
1322         ia->args.in_args[0].size = sizeof(ia->in);
1323         ia->args.in_args[0].value = &ia->in;
1324         ia->args.out_numargs = 1;
1325         /* Variable length argument used for backward compatibility
1326            with interface version < 7.5.  Rest of init_out is zeroed
1327            by do_get_request(), so a short reply is not a problem */
1328         ia->args.out_argvar = true;
1329         ia->args.out_args[0].size = sizeof(ia->out);
1330         ia->args.out_args[0].value = &ia->out;
1331         ia->args.force = true;
1332         ia->args.nocreds = true;
1333         ia->args.end = process_init_reply;
1334
1335         if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1336                 process_init_reply(fm, &ia->args, -ENOTCONN);
1337 }
1338 EXPORT_SYMBOL_GPL(fuse_send_init);
1339
1340 void fuse_free_conn(struct fuse_conn *fc)
1341 {
1342         WARN_ON(!list_empty(&fc->devices));
1343         kfree_rcu(fc, rcu);
1344 }
1345 EXPORT_SYMBOL_GPL(fuse_free_conn);
1346
1347 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1348 {
1349         int err;
1350         char *suffix = "";
1351
1352         if (sb->s_bdev) {
1353                 suffix = "-fuseblk";
1354                 /*
1355                  * sb->s_bdi points to blkdev's bdi however we want to redirect
1356                  * it to our private bdi...
1357                  */
1358                 bdi_put(sb->s_bdi);
1359                 sb->s_bdi = &noop_backing_dev_info;
1360         }
1361         err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1362                                    MINOR(fc->dev), suffix);
1363         if (err)
1364                 return err;
1365
1366         /* fuse does it's own writeback accounting */
1367         sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1368         sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1369
1370         /*
1371          * For a single fuse filesystem use max 1% of dirty +
1372          * writeback threshold.
1373          *
1374          * This gives about 1M of write buffer for memory maps on a
1375          * machine with 1G and 10% dirty_ratio, which should be more
1376          * than enough.
1377          *
1378          * Privileged users can raise it by writing to
1379          *
1380          *    /sys/class/bdi/<bdi>/max_ratio
1381          */
1382         bdi_set_max_ratio(sb->s_bdi, 1);
1383
1384         return 0;
1385 }
1386
1387 struct fuse_dev *fuse_dev_alloc(void)
1388 {
1389         struct fuse_dev *fud;
1390         struct list_head *pq;
1391
1392         fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1393         if (!fud)
1394                 return NULL;
1395
1396         pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1397         if (!pq) {
1398                 kfree(fud);
1399                 return NULL;
1400         }
1401
1402         fud->pq.processing = pq;
1403         fuse_pqueue_init(&fud->pq);
1404
1405         return fud;
1406 }
1407 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1408
1409 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1410 {
1411         fud->fc = fuse_conn_get(fc);
1412         spin_lock(&fc->lock);
1413         list_add_tail(&fud->entry, &fc->devices);
1414         spin_unlock(&fc->lock);
1415 }
1416 EXPORT_SYMBOL_GPL(fuse_dev_install);
1417
1418 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1419 {
1420         struct fuse_dev *fud;
1421
1422         fud = fuse_dev_alloc();
1423         if (!fud)
1424                 return NULL;
1425
1426         fuse_dev_install(fud, fc);
1427         return fud;
1428 }
1429 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1430
1431 void fuse_dev_free(struct fuse_dev *fud)
1432 {
1433         struct fuse_conn *fc = fud->fc;
1434
1435         if (fc) {
1436                 spin_lock(&fc->lock);
1437                 list_del(&fud->entry);
1438                 spin_unlock(&fc->lock);
1439
1440                 fuse_conn_put(fc);
1441         }
1442         kfree(fud->pq.processing);
1443         kfree(fud);
1444 }
1445 EXPORT_SYMBOL_GPL(fuse_dev_free);
1446
1447 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1448                                       const struct fuse_inode *fi)
1449 {
1450         *attr = (struct fuse_attr){
1451                 .ino            = fi->inode.i_ino,
1452                 .size           = fi->inode.i_size,
1453                 .blocks         = fi->inode.i_blocks,
1454                 .atime          = fi->inode.i_atime.tv_sec,
1455                 .mtime          = fi->inode.i_mtime.tv_sec,
1456                 .ctime          = fi->inode.i_ctime.tv_sec,
1457                 .atimensec      = fi->inode.i_atime.tv_nsec,
1458                 .mtimensec      = fi->inode.i_mtime.tv_nsec,
1459                 .ctimensec      = fi->inode.i_ctime.tv_nsec,
1460                 .mode           = fi->inode.i_mode,
1461                 .nlink          = fi->inode.i_nlink,
1462                 .uid            = fi->inode.i_uid.val,
1463                 .gid            = fi->inode.i_gid.val,
1464                 .rdev           = fi->inode.i_rdev,
1465                 .blksize        = 1u << fi->inode.i_blkbits,
1466         };
1467 }
1468
1469 static void fuse_sb_defaults(struct super_block *sb)
1470 {
1471         sb->s_magic = FUSE_SUPER_MAGIC;
1472         sb->s_op = &fuse_super_operations;
1473         sb->s_xattr = fuse_xattr_handlers;
1474         sb->s_maxbytes = MAX_LFS_FILESIZE;
1475         sb->s_time_gran = 1;
1476         sb->s_export_op = &fuse_export_operations;
1477         sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1478         if (sb->s_user_ns != &init_user_ns)
1479                 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1480         sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1481
1482         /*
1483          * If we are not in the initial user namespace posix
1484          * acls must be translated.
1485          */
1486         if (sb->s_user_ns != &init_user_ns)
1487                 sb->s_xattr = fuse_no_acl_xattr_handlers;
1488 }
1489
1490 static int fuse_fill_super_submount(struct super_block *sb,
1491                                     struct fuse_inode *parent_fi)
1492 {
1493         struct fuse_mount *fm = get_fuse_mount_super(sb);
1494         struct super_block *parent_sb = parent_fi->inode.i_sb;
1495         struct fuse_attr root_attr;
1496         struct inode *root;
1497         struct fuse_submount_lookup *sl;
1498         struct fuse_inode *fi;
1499
1500         fuse_sb_defaults(sb);
1501         fm->sb = sb;
1502
1503         WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1504         sb->s_bdi = bdi_get(parent_sb->s_bdi);
1505
1506         sb->s_xattr = parent_sb->s_xattr;
1507         sb->s_time_gran = parent_sb->s_time_gran;
1508         sb->s_blocksize = parent_sb->s_blocksize;
1509         sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1510         sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1511         if (parent_sb->s_subtype && !sb->s_subtype)
1512                 return -ENOMEM;
1513
1514         fuse_fill_attr_from_inode(&root_attr, parent_fi);
1515         root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1516         /*
1517          * This inode is just a duplicate, so it is not looked up and
1518          * its nlookup should not be incremented.  fuse_iget() does
1519          * that, though, so undo it here.
1520          */
1521         fi = get_fuse_inode(root);
1522         fi->nlookup--;
1523
1524         sb->s_d_op = &fuse_dentry_operations;
1525         sb->s_root = d_make_root(root);
1526         if (!sb->s_root)
1527                 return -ENOMEM;
1528
1529         /*
1530          * Grab the parent's submount_lookup pointer and take a
1531          * reference on the shared nlookup from the parent.  This is to
1532          * prevent the last forget for this nodeid from getting
1533          * triggered until all users have finished with it.
1534          */
1535         sl = parent_fi->submount_lookup;
1536         WARN_ON(!sl);
1537         if (sl) {
1538                 refcount_inc(&sl->count);
1539                 fi->submount_lookup = sl;
1540         }
1541
1542         return 0;
1543 }
1544
1545 /* Filesystem context private data holds the FUSE inode of the mount point */
1546 static int fuse_get_tree_submount(struct fs_context *fsc)
1547 {
1548         struct fuse_mount *fm;
1549         struct fuse_inode *mp_fi = fsc->fs_private;
1550         struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1551         struct super_block *sb;
1552         int err;
1553
1554         fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1555         if (!fm)
1556                 return -ENOMEM;
1557
1558         fm->fc = fuse_conn_get(fc);
1559         fsc->s_fs_info = fm;
1560         sb = sget_fc(fsc, NULL, set_anon_super_fc);
1561         if (fsc->s_fs_info)
1562                 fuse_mount_destroy(fm);
1563         if (IS_ERR(sb))
1564                 return PTR_ERR(sb);
1565
1566         /* Initialize superblock, making @mp_fi its root */
1567         err = fuse_fill_super_submount(sb, mp_fi);
1568         if (err) {
1569                 deactivate_locked_super(sb);
1570                 return err;
1571         }
1572
1573         down_write(&fc->killsb);
1574         list_add_tail(&fm->fc_entry, &fc->mounts);
1575         up_write(&fc->killsb);
1576
1577         sb->s_flags |= SB_ACTIVE;
1578         fsc->root = dget(sb->s_root);
1579
1580         return 0;
1581 }
1582
1583 static const struct fs_context_operations fuse_context_submount_ops = {
1584         .get_tree       = fuse_get_tree_submount,
1585 };
1586
1587 int fuse_init_fs_context_submount(struct fs_context *fsc)
1588 {
1589         fsc->ops = &fuse_context_submount_ops;
1590         return 0;
1591 }
1592 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1593
1594 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1595 {
1596         struct fuse_dev *fud = NULL;
1597         struct fuse_mount *fm = get_fuse_mount_super(sb);
1598         struct fuse_conn *fc = fm->fc;
1599         struct inode *root;
1600         struct dentry *root_dentry;
1601         int err;
1602
1603         err = -EINVAL;
1604         if (sb->s_flags & SB_MANDLOCK)
1605                 goto err;
1606
1607         rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1608         fuse_sb_defaults(sb);
1609
1610         if (ctx->is_bdev) {
1611 #ifdef CONFIG_BLOCK
1612                 err = -EINVAL;
1613                 if (!sb_set_blocksize(sb, ctx->blksize))
1614                         goto err;
1615 #endif
1616         } else {
1617                 sb->s_blocksize = PAGE_SIZE;
1618                 sb->s_blocksize_bits = PAGE_SHIFT;
1619         }
1620
1621         sb->s_subtype = ctx->subtype;
1622         ctx->subtype = NULL;
1623         if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1624                 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
1625                 if (err)
1626                         goto err;
1627         }
1628
1629         if (ctx->fudptr) {
1630                 err = -ENOMEM;
1631                 fud = fuse_dev_alloc_install(fc);
1632                 if (!fud)
1633                         goto err_free_dax;
1634         }
1635
1636         fc->dev = sb->s_dev;
1637         fm->sb = sb;
1638         err = fuse_bdi_init(fc, sb);
1639         if (err)
1640                 goto err_dev_free;
1641
1642         /* Handle umasking inside the fuse code */
1643         if (sb->s_flags & SB_POSIXACL)
1644                 fc->dont_mask = 1;
1645         sb->s_flags |= SB_POSIXACL;
1646
1647         fc->default_permissions = ctx->default_permissions;
1648         fc->allow_other = ctx->allow_other;
1649         fc->user_id = ctx->user_id;
1650         fc->group_id = ctx->group_id;
1651         fc->legacy_opts_show = ctx->legacy_opts_show;
1652         fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1653         fc->destroy = ctx->destroy;
1654         fc->no_control = ctx->no_control;
1655         fc->no_force_umount = ctx->no_force_umount;
1656
1657         err = -ENOMEM;
1658         root = fuse_get_root_inode(sb, ctx->rootmode);
1659         sb->s_d_op = &fuse_root_dentry_operations;
1660         root_dentry = d_make_root(root);
1661         if (!root_dentry)
1662                 goto err_dev_free;
1663         /* Root dentry doesn't have .d_revalidate */
1664         sb->s_d_op = &fuse_dentry_operations;
1665
1666         mutex_lock(&fuse_mutex);
1667         err = -EINVAL;
1668         if (ctx->fudptr && *ctx->fudptr)
1669                 goto err_unlock;
1670
1671         err = fuse_ctl_add_conn(fc);
1672         if (err)
1673                 goto err_unlock;
1674
1675         list_add_tail(&fc->entry, &fuse_conn_list);
1676         sb->s_root = root_dentry;
1677         if (ctx->fudptr)
1678                 *ctx->fudptr = fud;
1679         mutex_unlock(&fuse_mutex);
1680         return 0;
1681
1682  err_unlock:
1683         mutex_unlock(&fuse_mutex);
1684         dput(root_dentry);
1685  err_dev_free:
1686         if (fud)
1687                 fuse_dev_free(fud);
1688  err_free_dax:
1689         if (IS_ENABLED(CONFIG_FUSE_DAX))
1690                 fuse_dax_conn_free(fc);
1691  err:
1692         return err;
1693 }
1694 EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1695
1696 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1697 {
1698         struct fuse_fs_context *ctx = fsc->fs_private;
1699         int err;
1700
1701         if (!ctx->file || !ctx->rootmode_present ||
1702             !ctx->user_id_present || !ctx->group_id_present)
1703                 return -EINVAL;
1704
1705         /*
1706          * Require mount to happen from the same user namespace which
1707          * opened /dev/fuse to prevent potential attacks.
1708          */
1709         if ((ctx->file->f_op != &fuse_dev_operations) ||
1710             (ctx->file->f_cred->user_ns != sb->s_user_ns))
1711                 return -EINVAL;
1712         ctx->fudptr = &ctx->file->private_data;
1713
1714         err = fuse_fill_super_common(sb, ctx);
1715         if (err)
1716                 return err;
1717         /* file->private_data shall be visible on all CPUs after this */
1718         smp_mb();
1719         fuse_send_init(get_fuse_mount_super(sb));
1720         return 0;
1721 }
1722
1723 /*
1724  * This is the path where user supplied an already initialized fuse dev.  In
1725  * this case never create a new super if the old one is gone.
1726  */
1727 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1728 {
1729         return -ENOTCONN;
1730 }
1731
1732 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1733 {
1734
1735         return fsc->sget_key == get_fuse_conn_super(sb);
1736 }
1737
1738 static int fuse_get_tree(struct fs_context *fsc)
1739 {
1740         struct fuse_fs_context *ctx = fsc->fs_private;
1741         struct fuse_dev *fud;
1742         struct fuse_conn *fc;
1743         struct fuse_mount *fm;
1744         struct super_block *sb;
1745         int err;
1746
1747         fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1748         if (!fc)
1749                 return -ENOMEM;
1750
1751         fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1752         if (!fm) {
1753                 kfree(fc);
1754                 return -ENOMEM;
1755         }
1756
1757         fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
1758         fc->release = fuse_free_conn;
1759
1760         fsc->s_fs_info = fm;
1761
1762         if (ctx->fd_present)
1763                 ctx->file = fget(ctx->fd);
1764
1765         if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1766                 err = get_tree_bdev(fsc, fuse_fill_super);
1767                 goto out;
1768         }
1769         /*
1770          * While block dev mount can be initialized with a dummy device fd
1771          * (found by device name), normal fuse mounts can't
1772          */
1773         err = -EINVAL;
1774         if (!ctx->file)
1775                 goto out;
1776
1777         /*
1778          * Allow creating a fuse mount with an already initialized fuse
1779          * connection
1780          */
1781         fud = READ_ONCE(ctx->file->private_data);
1782         if (ctx->file->f_op == &fuse_dev_operations && fud) {
1783                 fsc->sget_key = fud->fc;
1784                 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
1785                 err = PTR_ERR_OR_ZERO(sb);
1786                 if (!IS_ERR(sb))
1787                         fsc->root = dget(sb->s_root);
1788         } else {
1789                 err = get_tree_nodev(fsc, fuse_fill_super);
1790         }
1791 out:
1792         if (fsc->s_fs_info)
1793                 fuse_mount_destroy(fm);
1794         if (ctx->file)
1795                 fput(ctx->file);
1796         return err;
1797 }
1798
1799 static const struct fs_context_operations fuse_context_ops = {
1800         .free           = fuse_free_fsc,
1801         .parse_param    = fuse_parse_param,
1802         .reconfigure    = fuse_reconfigure,
1803         .get_tree       = fuse_get_tree,
1804 };
1805
1806 /*
1807  * Set up the filesystem mount context.
1808  */
1809 static int fuse_init_fs_context(struct fs_context *fsc)
1810 {
1811         struct fuse_fs_context *ctx;
1812
1813         ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1814         if (!ctx)
1815                 return -ENOMEM;
1816
1817         ctx->max_read = ~0;
1818         ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1819         ctx->legacy_opts_show = true;
1820
1821 #ifdef CONFIG_BLOCK
1822         if (fsc->fs_type == &fuseblk_fs_type) {
1823                 ctx->is_bdev = true;
1824                 ctx->destroy = true;
1825         }
1826 #endif
1827
1828         fsc->fs_private = ctx;
1829         fsc->ops = &fuse_context_ops;
1830         return 0;
1831 }
1832
1833 bool fuse_mount_remove(struct fuse_mount *fm)
1834 {
1835         struct fuse_conn *fc = fm->fc;
1836         bool last = false;
1837
1838         down_write(&fc->killsb);
1839         list_del_init(&fm->fc_entry);
1840         if (list_empty(&fc->mounts))
1841                 last = true;
1842         up_write(&fc->killsb);
1843
1844         return last;
1845 }
1846 EXPORT_SYMBOL_GPL(fuse_mount_remove);
1847
1848 void fuse_conn_destroy(struct fuse_mount *fm)
1849 {
1850         struct fuse_conn *fc = fm->fc;
1851
1852         if (fc->destroy)
1853                 fuse_send_destroy(fm);
1854
1855         fuse_abort_conn(fc);
1856         fuse_wait_aborted(fc);
1857
1858         if (!list_empty(&fc->entry)) {
1859                 mutex_lock(&fuse_mutex);
1860                 list_del(&fc->entry);
1861                 fuse_ctl_remove_conn(fc);
1862                 mutex_unlock(&fuse_mutex);
1863         }
1864 }
1865 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
1866
1867 static void fuse_sb_destroy(struct super_block *sb)
1868 {
1869         struct fuse_mount *fm = get_fuse_mount_super(sb);
1870         bool last;
1871
1872         if (sb->s_root) {
1873                 last = fuse_mount_remove(fm);
1874                 if (last)
1875                         fuse_conn_destroy(fm);
1876         }
1877 }
1878
1879 void fuse_mount_destroy(struct fuse_mount *fm)
1880 {
1881         fuse_conn_put(fm->fc);
1882         kfree(fm);
1883 }
1884 EXPORT_SYMBOL(fuse_mount_destroy);
1885
1886 static void fuse_kill_sb_anon(struct super_block *sb)
1887 {
1888         fuse_sb_destroy(sb);
1889         kill_anon_super(sb);
1890         fuse_mount_destroy(get_fuse_mount_super(sb));
1891 }
1892
1893 static struct file_system_type fuse_fs_type = {
1894         .owner          = THIS_MODULE,
1895         .name           = "fuse",
1896         .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1897         .init_fs_context = fuse_init_fs_context,
1898         .parameters     = fuse_fs_parameters,
1899         .kill_sb        = fuse_kill_sb_anon,
1900 };
1901 MODULE_ALIAS_FS("fuse");
1902
1903 #ifdef CONFIG_BLOCK
1904 static void fuse_kill_sb_blk(struct super_block *sb)
1905 {
1906         fuse_sb_destroy(sb);
1907         kill_block_super(sb);
1908         fuse_mount_destroy(get_fuse_mount_super(sb));
1909 }
1910
1911 static struct file_system_type fuseblk_fs_type = {
1912         .owner          = THIS_MODULE,
1913         .name           = "fuseblk",
1914         .init_fs_context = fuse_init_fs_context,
1915         .parameters     = fuse_fs_parameters,
1916         .kill_sb        = fuse_kill_sb_blk,
1917         .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1918 };
1919 MODULE_ALIAS_FS("fuseblk");
1920
1921 static inline int register_fuseblk(void)
1922 {
1923         return register_filesystem(&fuseblk_fs_type);
1924 }
1925
1926 static inline void unregister_fuseblk(void)
1927 {
1928         unregister_filesystem(&fuseblk_fs_type);
1929 }
1930 #else
1931 static inline int register_fuseblk(void)
1932 {
1933         return 0;
1934 }
1935
1936 static inline void unregister_fuseblk(void)
1937 {
1938 }
1939 #endif
1940
1941 static void fuse_inode_init_once(void *foo)
1942 {
1943         struct inode *inode = foo;
1944
1945         inode_init_once(inode);
1946 }
1947
1948 static int __init fuse_fs_init(void)
1949 {
1950         int err;
1951
1952         fuse_inode_cachep = kmem_cache_create("fuse_inode",
1953                         sizeof(struct fuse_inode), 0,
1954                         SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
1955                         fuse_inode_init_once);
1956         err = -ENOMEM;
1957         if (!fuse_inode_cachep)
1958                 goto out;
1959
1960         err = register_fuseblk();
1961         if (err)
1962                 goto out2;
1963
1964         err = register_filesystem(&fuse_fs_type);
1965         if (err)
1966                 goto out3;
1967
1968         return 0;
1969
1970  out3:
1971         unregister_fuseblk();
1972  out2:
1973         kmem_cache_destroy(fuse_inode_cachep);
1974  out:
1975         return err;
1976 }
1977
1978 static void fuse_fs_cleanup(void)
1979 {
1980         unregister_filesystem(&fuse_fs_type);
1981         unregister_fuseblk();
1982
1983         /*
1984          * Make sure all delayed rcu free inodes are flushed before we
1985          * destroy cache.
1986          */
1987         rcu_barrier();
1988         kmem_cache_destroy(fuse_inode_cachep);
1989 }
1990
1991 static struct kobject *fuse_kobj;
1992
1993 static int fuse_sysfs_init(void)
1994 {
1995         int err;
1996
1997         fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1998         if (!fuse_kobj) {
1999                 err = -ENOMEM;
2000                 goto out_err;
2001         }
2002
2003         err = sysfs_create_mount_point(fuse_kobj, "connections");
2004         if (err)
2005                 goto out_fuse_unregister;
2006
2007         return 0;
2008
2009  out_fuse_unregister:
2010         kobject_put(fuse_kobj);
2011  out_err:
2012         return err;
2013 }
2014
2015 static void fuse_sysfs_cleanup(void)
2016 {
2017         sysfs_remove_mount_point(fuse_kobj, "connections");
2018         kobject_put(fuse_kobj);
2019 }
2020
2021 static int __init fuse_init(void)
2022 {
2023         int res;
2024
2025         pr_info("init (API version %i.%i)\n",
2026                 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
2027
2028         INIT_LIST_HEAD(&fuse_conn_list);
2029         res = fuse_fs_init();
2030         if (res)
2031                 goto err;
2032
2033         res = fuse_dev_init();
2034         if (res)
2035                 goto err_fs_cleanup;
2036
2037         res = fuse_sysfs_init();
2038         if (res)
2039                 goto err_dev_cleanup;
2040
2041         res = fuse_ctl_init();
2042         if (res)
2043                 goto err_sysfs_cleanup;
2044
2045         sanitize_global_limit(&max_user_bgreq);
2046         sanitize_global_limit(&max_user_congthresh);
2047
2048         return 0;
2049
2050  err_sysfs_cleanup:
2051         fuse_sysfs_cleanup();
2052  err_dev_cleanup:
2053         fuse_dev_cleanup();
2054  err_fs_cleanup:
2055         fuse_fs_cleanup();
2056  err:
2057         return res;
2058 }
2059
2060 static void __exit fuse_exit(void)
2061 {
2062         pr_debug("exit\n");
2063
2064         fuse_ctl_cleanup();
2065         fuse_sysfs_cleanup();
2066         fuse_fs_cleanup();
2067         fuse_dev_cleanup();
2068 }
2069
2070 module_init(fuse_init);
2071 module_exit(fuse_exit);