GNU Linux-libre 6.6.31-gnu
[releases.git] / fs / fuse / inode.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/seq_file.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/statfs.h>
21 #include <linux/random.h>
22 #include <linux/sched.h>
23 #include <linux/exportfs.h>
24 #include <linux/posix_acl.h>
25 #include <linux/pid_namespace.h>
26 #include <uapi/linux/magic.h>
27
28 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
29 MODULE_DESCRIPTION("Filesystem in Userspace");
30 MODULE_LICENSE("GPL");
31
32 static struct kmem_cache *fuse_inode_cachep;
33 struct list_head fuse_conn_list;
34 DEFINE_MUTEX(fuse_mutex);
35
36 static int set_global_limit(const char *val, const struct kernel_param *kp);
37
38 unsigned max_user_bgreq;
39 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
40                   &max_user_bgreq, 0644);
41 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
42 MODULE_PARM_DESC(max_user_bgreq,
43  "Global limit for the maximum number of backgrounded requests an "
44  "unprivileged user can set");
45
46 unsigned max_user_congthresh;
47 module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
48                   &max_user_congthresh, 0644);
49 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
50 MODULE_PARM_DESC(max_user_congthresh,
51  "Global limit for the maximum congestion threshold an "
52  "unprivileged user can set");
53
54 #define FUSE_DEFAULT_BLKSIZE 512
55
56 /** Maximum number of outstanding background requests */
57 #define FUSE_DEFAULT_MAX_BACKGROUND 12
58
59 /** Congestion starts at 75% of maximum */
60 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
61
62 #ifdef CONFIG_BLOCK
63 static struct file_system_type fuseblk_fs_type;
64 #endif
65
66 struct fuse_forget_link *fuse_alloc_forget(void)
67 {
68         return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
69 }
70
71 static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void)
72 {
73         struct fuse_submount_lookup *sl;
74
75         sl = kzalloc(sizeof(struct fuse_submount_lookup), GFP_KERNEL_ACCOUNT);
76         if (!sl)
77                 return NULL;
78         sl->forget = fuse_alloc_forget();
79         if (!sl->forget)
80                 goto out_free;
81
82         return sl;
83
84 out_free:
85         kfree(sl);
86         return NULL;
87 }
88
89 static struct inode *fuse_alloc_inode(struct super_block *sb)
90 {
91         struct fuse_inode *fi;
92
93         fi = alloc_inode_sb(sb, fuse_inode_cachep, GFP_KERNEL);
94         if (!fi)
95                 return NULL;
96
97         fi->i_time = 0;
98         fi->inval_mask = ~0;
99         fi->nodeid = 0;
100         fi->nlookup = 0;
101         fi->attr_version = 0;
102         fi->orig_ino = 0;
103         fi->state = 0;
104         fi->submount_lookup = NULL;
105         mutex_init(&fi->mutex);
106         spin_lock_init(&fi->lock);
107         fi->forget = fuse_alloc_forget();
108         if (!fi->forget)
109                 goto out_free;
110
111         if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
112                 goto out_free_forget;
113
114         return &fi->inode;
115
116 out_free_forget:
117         kfree(fi->forget);
118 out_free:
119         kmem_cache_free(fuse_inode_cachep, fi);
120         return NULL;
121 }
122
123 static void fuse_free_inode(struct inode *inode)
124 {
125         struct fuse_inode *fi = get_fuse_inode(inode);
126
127         mutex_destroy(&fi->mutex);
128         kfree(fi->forget);
129 #ifdef CONFIG_FUSE_DAX
130         kfree(fi->dax);
131 #endif
132         kmem_cache_free(fuse_inode_cachep, fi);
133 }
134
135 static void fuse_cleanup_submount_lookup(struct fuse_conn *fc,
136                                          struct fuse_submount_lookup *sl)
137 {
138         if (!refcount_dec_and_test(&sl->count))
139                 return;
140
141         fuse_queue_forget(fc, sl->forget, sl->nodeid, 1);
142         sl->forget = NULL;
143         kfree(sl);
144 }
145
146 static void fuse_evict_inode(struct inode *inode)
147 {
148         struct fuse_inode *fi = get_fuse_inode(inode);
149
150         /* Will write inode on close/munmap and in all other dirtiers */
151         WARN_ON(inode->i_state & I_DIRTY_INODE);
152
153         truncate_inode_pages_final(&inode->i_data);
154         clear_inode(inode);
155         if (inode->i_sb->s_flags & SB_ACTIVE) {
156                 struct fuse_conn *fc = get_fuse_conn(inode);
157
158                 if (FUSE_IS_DAX(inode))
159                         fuse_dax_inode_cleanup(inode);
160                 if (fi->nlookup) {
161                         fuse_queue_forget(fc, fi->forget, fi->nodeid,
162                                           fi->nlookup);
163                         fi->forget = NULL;
164                 }
165
166                 if (fi->submount_lookup) {
167                         fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
168                         fi->submount_lookup = NULL;
169                 }
170         }
171         if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
172                 WARN_ON(!list_empty(&fi->write_files));
173                 WARN_ON(!list_empty(&fi->queued_writes));
174         }
175 }
176
177 static int fuse_reconfigure(struct fs_context *fsc)
178 {
179         struct super_block *sb = fsc->root->d_sb;
180
181         sync_filesystem(sb);
182         if (fsc->sb_flags & SB_MANDLOCK)
183                 return -EINVAL;
184
185         return 0;
186 }
187
188 /*
189  * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
190  * so that it will fit.
191  */
192 static ino_t fuse_squash_ino(u64 ino64)
193 {
194         ino_t ino = (ino_t) ino64;
195         if (sizeof(ino_t) < sizeof(u64))
196                 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
197         return ino;
198 }
199
200 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
201                                    struct fuse_statx *sx,
202                                    u64 attr_valid, u32 cache_mask)
203 {
204         struct fuse_conn *fc = get_fuse_conn(inode);
205         struct fuse_inode *fi = get_fuse_inode(inode);
206
207         lockdep_assert_held(&fi->lock);
208
209         fi->attr_version = atomic64_inc_return(&fc->attr_version);
210         fi->i_time = attr_valid;
211         /* Clear basic stats from invalid mask */
212         set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
213
214         inode->i_ino     = fuse_squash_ino(attr->ino);
215         inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
216         set_nlink(inode, attr->nlink);
217         inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
218         inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
219         inode->i_blocks  = attr->blocks;
220
221         /* Sanitize nsecs */
222         attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
223         attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
224         attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
225
226         inode->i_atime.tv_sec   = attr->atime;
227         inode->i_atime.tv_nsec  = attr->atimensec;
228         /* mtime from server may be stale due to local buffered write */
229         if (!(cache_mask & STATX_MTIME)) {
230                 inode->i_mtime.tv_sec   = attr->mtime;
231                 inode->i_mtime.tv_nsec  = attr->mtimensec;
232         }
233         if (!(cache_mask & STATX_CTIME)) {
234                 inode_set_ctime(inode, attr->ctime, attr->ctimensec);
235         }
236         if (sx) {
237                 /* Sanitize nsecs */
238                 sx->btime.tv_nsec =
239                         min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
240
241                 /*
242                  * Btime has been queried, cache is valid (whether or not btime
243                  * is available or not) so clear STATX_BTIME from inval_mask.
244                  *
245                  * Availability of the btime attribute is indicated in
246                  * FUSE_I_BTIME
247                  */
248                 set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
249                 if (sx->mask & STATX_BTIME) {
250                         set_bit(FUSE_I_BTIME, &fi->state);
251                         fi->i_btime.tv_sec = sx->btime.tv_sec;
252                         fi->i_btime.tv_nsec = sx->btime.tv_nsec;
253                 }
254         }
255
256         if (attr->blksize != 0)
257                 inode->i_blkbits = ilog2(attr->blksize);
258         else
259                 inode->i_blkbits = inode->i_sb->s_blocksize_bits;
260
261         /*
262          * Don't set the sticky bit in i_mode, unless we want the VFS
263          * to check permissions.  This prevents failures due to the
264          * check in may_delete().
265          */
266         fi->orig_i_mode = inode->i_mode;
267         if (!fc->default_permissions)
268                 inode->i_mode &= ~S_ISVTX;
269
270         fi->orig_ino = attr->ino;
271
272         /*
273          * We are refreshing inode data and it is possible that another
274          * client set suid/sgid or security.capability xattr. So clear
275          * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
276          * was set or if security.capability xattr was set. But we don't
277          * know if security.capability has been set or not. So clear it
278          * anyway. Its less efficient but should be safe.
279          */
280         inode->i_flags &= ~S_NOSEC;
281 }
282
283 u32 fuse_get_cache_mask(struct inode *inode)
284 {
285         struct fuse_conn *fc = get_fuse_conn(inode);
286
287         if (!fc->writeback_cache || !S_ISREG(inode->i_mode))
288                 return 0;
289
290         return STATX_MTIME | STATX_CTIME | STATX_SIZE;
291 }
292
293 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
294                             struct fuse_statx *sx,
295                             u64 attr_valid, u64 attr_version)
296 {
297         struct fuse_conn *fc = get_fuse_conn(inode);
298         struct fuse_inode *fi = get_fuse_inode(inode);
299         u32 cache_mask;
300         loff_t oldsize;
301         struct timespec64 old_mtime;
302
303         spin_lock(&fi->lock);
304         /*
305          * In case of writeback_cache enabled, writes update mtime, ctime and
306          * may update i_size.  In these cases trust the cached value in the
307          * inode.
308          */
309         cache_mask = fuse_get_cache_mask(inode);
310         if (cache_mask & STATX_SIZE)
311                 attr->size = i_size_read(inode);
312
313         if (cache_mask & STATX_MTIME) {
314                 attr->mtime = inode->i_mtime.tv_sec;
315                 attr->mtimensec = inode->i_mtime.tv_nsec;
316         }
317         if (cache_mask & STATX_CTIME) {
318                 attr->ctime = inode_get_ctime(inode).tv_sec;
319                 attr->ctimensec = inode_get_ctime(inode).tv_nsec;
320         }
321
322         if ((attr_version != 0 && fi->attr_version > attr_version) ||
323             test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
324                 spin_unlock(&fi->lock);
325                 return;
326         }
327
328         old_mtime = inode->i_mtime;
329         fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
330
331         oldsize = inode->i_size;
332         /*
333          * In case of writeback_cache enabled, the cached writes beyond EOF
334          * extend local i_size without keeping userspace server in sync. So,
335          * attr->size coming from server can be stale. We cannot trust it.
336          */
337         if (!(cache_mask & STATX_SIZE))
338                 i_size_write(inode, attr->size);
339         spin_unlock(&fi->lock);
340
341         if (!cache_mask && S_ISREG(inode->i_mode)) {
342                 bool inval = false;
343
344                 if (oldsize != attr->size) {
345                         truncate_pagecache(inode, attr->size);
346                         if (!fc->explicit_inval_data)
347                                 inval = true;
348                 } else if (fc->auto_inval_data) {
349                         struct timespec64 new_mtime = {
350                                 .tv_sec = attr->mtime,
351                                 .tv_nsec = attr->mtimensec,
352                         };
353
354                         /*
355                          * Auto inval mode also checks and invalidates if mtime
356                          * has changed.
357                          */
358                         if (!timespec64_equal(&old_mtime, &new_mtime))
359                                 inval = true;
360                 }
361
362                 if (inval)
363                         invalidate_inode_pages2(inode->i_mapping);
364         }
365
366         if (IS_ENABLED(CONFIG_FUSE_DAX))
367                 fuse_dax_dontcache(inode, attr->flags);
368 }
369
370 static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
371                                       u64 nodeid)
372 {
373         sl->nodeid = nodeid;
374         refcount_set(&sl->count, 1);
375 }
376
377 static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
378                             struct fuse_conn *fc)
379 {
380         inode->i_mode = attr->mode & S_IFMT;
381         inode->i_size = attr->size;
382         inode->i_mtime.tv_sec  = attr->mtime;
383         inode->i_mtime.tv_nsec = attr->mtimensec;
384         inode_set_ctime(inode, attr->ctime, attr->ctimensec);
385         if (S_ISREG(inode->i_mode)) {
386                 fuse_init_common(inode);
387                 fuse_init_file_inode(inode, attr->flags);
388         } else if (S_ISDIR(inode->i_mode))
389                 fuse_init_dir(inode);
390         else if (S_ISLNK(inode->i_mode))
391                 fuse_init_symlink(inode);
392         else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
393                  S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
394                 fuse_init_common(inode);
395                 init_special_inode(inode, inode->i_mode,
396                                    new_decode_dev(attr->rdev));
397         } else
398                 BUG();
399         /*
400          * Ensure that we don't cache acls for daemons without FUSE_POSIX_ACL
401          * so they see the exact same behavior as before.
402          */
403         if (!fc->posix_acl)
404                 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
405 }
406
407 static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
408 {
409         u64 nodeid = *(u64 *) _nodeidp;
410         if (get_node_id(inode) == nodeid)
411                 return 1;
412         else
413                 return 0;
414 }
415
416 static int fuse_inode_set(struct inode *inode, void *_nodeidp)
417 {
418         u64 nodeid = *(u64 *) _nodeidp;
419         get_fuse_inode(inode)->nodeid = nodeid;
420         return 0;
421 }
422
423 struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
424                         int generation, struct fuse_attr *attr,
425                         u64 attr_valid, u64 attr_version)
426 {
427         struct inode *inode;
428         struct fuse_inode *fi;
429         struct fuse_conn *fc = get_fuse_conn_super(sb);
430
431         /*
432          * Auto mount points get their node id from the submount root, which is
433          * not a unique identifier within this filesystem.
434          *
435          * To avoid conflicts, do not place submount points into the inode hash
436          * table.
437          */
438         if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
439             S_ISDIR(attr->mode)) {
440                 struct fuse_inode *fi;
441
442                 inode = new_inode(sb);
443                 if (!inode)
444                         return NULL;
445
446                 fuse_init_inode(inode, attr, fc);
447                 fi = get_fuse_inode(inode);
448                 fi->nodeid = nodeid;
449                 fi->submount_lookup = fuse_alloc_submount_lookup();
450                 if (!fi->submount_lookup) {
451                         iput(inode);
452                         return NULL;
453                 }
454                 /* Sets nlookup = 1 on fi->submount_lookup->nlookup */
455                 fuse_init_submount_lookup(fi->submount_lookup, nodeid);
456                 inode->i_flags |= S_AUTOMOUNT;
457                 goto done;
458         }
459
460 retry:
461         inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
462         if (!inode)
463                 return NULL;
464
465         if ((inode->i_state & I_NEW)) {
466                 inode->i_flags |= S_NOATIME;
467                 if (!fc->writeback_cache || !S_ISREG(attr->mode))
468                         inode->i_flags |= S_NOCMTIME;
469                 inode->i_generation = generation;
470                 fuse_init_inode(inode, attr, fc);
471                 unlock_new_inode(inode);
472         } else if (fuse_stale_inode(inode, generation, attr)) {
473                 /* nodeid was reused, any I/O on the old inode should fail */
474                 fuse_make_bad(inode);
475                 if (inode != d_inode(sb->s_root)) {
476                         remove_inode_hash(inode);
477                         iput(inode);
478                         goto retry;
479                 }
480         }
481         fi = get_fuse_inode(inode);
482         spin_lock(&fi->lock);
483         fi->nlookup++;
484         spin_unlock(&fi->lock);
485 done:
486         fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
487
488         return inode;
489 }
490
491 struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
492                            struct fuse_mount **fm)
493 {
494         struct fuse_mount *fm_iter;
495         struct inode *inode;
496
497         WARN_ON(!rwsem_is_locked(&fc->killsb));
498         list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
499                 if (!fm_iter->sb)
500                         continue;
501
502                 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
503                 if (inode) {
504                         if (fm)
505                                 *fm = fm_iter;
506                         return inode;
507                 }
508         }
509
510         return NULL;
511 }
512
513 int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
514                              loff_t offset, loff_t len)
515 {
516         struct fuse_inode *fi;
517         struct inode *inode;
518         pgoff_t pg_start;
519         pgoff_t pg_end;
520
521         inode = fuse_ilookup(fc, nodeid, NULL);
522         if (!inode)
523                 return -ENOENT;
524
525         fi = get_fuse_inode(inode);
526         spin_lock(&fi->lock);
527         fi->attr_version = atomic64_inc_return(&fc->attr_version);
528         spin_unlock(&fi->lock);
529
530         fuse_invalidate_attr(inode);
531         forget_all_cached_acls(inode);
532         if (offset >= 0) {
533                 pg_start = offset >> PAGE_SHIFT;
534                 if (len <= 0)
535                         pg_end = -1;
536                 else
537                         pg_end = (offset + len - 1) >> PAGE_SHIFT;
538                 invalidate_inode_pages2_range(inode->i_mapping,
539                                               pg_start, pg_end);
540         }
541         iput(inode);
542         return 0;
543 }
544
545 bool fuse_lock_inode(struct inode *inode)
546 {
547         bool locked = false;
548
549         if (!get_fuse_conn(inode)->parallel_dirops) {
550                 mutex_lock(&get_fuse_inode(inode)->mutex);
551                 locked = true;
552         }
553
554         return locked;
555 }
556
557 void fuse_unlock_inode(struct inode *inode, bool locked)
558 {
559         if (locked)
560                 mutex_unlock(&get_fuse_inode(inode)->mutex);
561 }
562
563 static void fuse_umount_begin(struct super_block *sb)
564 {
565         struct fuse_conn *fc = get_fuse_conn_super(sb);
566
567         if (fc->no_force_umount)
568                 return;
569
570         fuse_abort_conn(fc);
571
572         // Only retire block-device-based superblocks.
573         if (sb->s_bdev != NULL)
574                 retire_super(sb);
575 }
576
577 static void fuse_send_destroy(struct fuse_mount *fm)
578 {
579         if (fm->fc->conn_init) {
580                 FUSE_ARGS(args);
581
582                 args.opcode = FUSE_DESTROY;
583                 args.force = true;
584                 args.nocreds = true;
585                 fuse_simple_request(fm, &args);
586         }
587 }
588
589 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
590 {
591         stbuf->f_type    = FUSE_SUPER_MAGIC;
592         stbuf->f_bsize   = attr->bsize;
593         stbuf->f_frsize  = attr->frsize;
594         stbuf->f_blocks  = attr->blocks;
595         stbuf->f_bfree   = attr->bfree;
596         stbuf->f_bavail  = attr->bavail;
597         stbuf->f_files   = attr->files;
598         stbuf->f_ffree   = attr->ffree;
599         stbuf->f_namelen = attr->namelen;
600         /* fsid is left zero */
601 }
602
603 static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
604 {
605         struct super_block *sb = dentry->d_sb;
606         struct fuse_mount *fm = get_fuse_mount_super(sb);
607         FUSE_ARGS(args);
608         struct fuse_statfs_out outarg;
609         int err;
610
611         if (!fuse_allow_current_process(fm->fc)) {
612                 buf->f_type = FUSE_SUPER_MAGIC;
613                 return 0;
614         }
615
616         memset(&outarg, 0, sizeof(outarg));
617         args.in_numargs = 0;
618         args.opcode = FUSE_STATFS;
619         args.nodeid = get_node_id(d_inode(dentry));
620         args.out_numargs = 1;
621         args.out_args[0].size = sizeof(outarg);
622         args.out_args[0].value = &outarg;
623         err = fuse_simple_request(fm, &args);
624         if (!err)
625                 convert_fuse_statfs(buf, &outarg.st);
626         return err;
627 }
628
629 static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
630 {
631         struct fuse_sync_bucket *bucket;
632
633         bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
634         if (bucket) {
635                 init_waitqueue_head(&bucket->waitq);
636                 /* Initial active count */
637                 atomic_set(&bucket->count, 1);
638         }
639         return bucket;
640 }
641
642 static void fuse_sync_fs_writes(struct fuse_conn *fc)
643 {
644         struct fuse_sync_bucket *bucket, *new_bucket;
645         int count;
646
647         new_bucket = fuse_sync_bucket_alloc();
648         spin_lock(&fc->lock);
649         bucket = rcu_dereference_protected(fc->curr_bucket, 1);
650         count = atomic_read(&bucket->count);
651         WARN_ON(count < 1);
652         /* No outstanding writes? */
653         if (count == 1) {
654                 spin_unlock(&fc->lock);
655                 kfree(new_bucket);
656                 return;
657         }
658
659         /*
660          * Completion of new bucket depends on completion of this bucket, so add
661          * one more count.
662          */
663         atomic_inc(&new_bucket->count);
664         rcu_assign_pointer(fc->curr_bucket, new_bucket);
665         spin_unlock(&fc->lock);
666         /*
667          * Drop initial active count.  At this point if all writes in this and
668          * ancestor buckets complete, the count will go to zero and this task
669          * will be woken up.
670          */
671         atomic_dec(&bucket->count);
672
673         wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
674
675         /* Drop temp count on descendant bucket */
676         fuse_sync_bucket_dec(new_bucket);
677         kfree_rcu(bucket, rcu);
678 }
679
680 static int fuse_sync_fs(struct super_block *sb, int wait)
681 {
682         struct fuse_mount *fm = get_fuse_mount_super(sb);
683         struct fuse_conn *fc = fm->fc;
684         struct fuse_syncfs_in inarg;
685         FUSE_ARGS(args);
686         int err;
687
688         /*
689          * Userspace cannot handle the wait == 0 case.  Avoid a
690          * gratuitous roundtrip.
691          */
692         if (!wait)
693                 return 0;
694
695         /* The filesystem is being unmounted.  Nothing to do. */
696         if (!sb->s_root)
697                 return 0;
698
699         if (!fc->sync_fs)
700                 return 0;
701
702         fuse_sync_fs_writes(fc);
703
704         memset(&inarg, 0, sizeof(inarg));
705         args.in_numargs = 1;
706         args.in_args[0].size = sizeof(inarg);
707         args.in_args[0].value = &inarg;
708         args.opcode = FUSE_SYNCFS;
709         args.nodeid = get_node_id(sb->s_root->d_inode);
710         args.out_numargs = 0;
711
712         err = fuse_simple_request(fm, &args);
713         if (err == -ENOSYS) {
714                 fc->sync_fs = 0;
715                 err = 0;
716         }
717
718         return err;
719 }
720
721 enum {
722         OPT_SOURCE,
723         OPT_SUBTYPE,
724         OPT_FD,
725         OPT_ROOTMODE,
726         OPT_USER_ID,
727         OPT_GROUP_ID,
728         OPT_DEFAULT_PERMISSIONS,
729         OPT_ALLOW_OTHER,
730         OPT_MAX_READ,
731         OPT_BLKSIZE,
732         OPT_ERR
733 };
734
735 static const struct fs_parameter_spec fuse_fs_parameters[] = {
736         fsparam_string  ("source",              OPT_SOURCE),
737         fsparam_u32     ("fd",                  OPT_FD),
738         fsparam_u32oct  ("rootmode",            OPT_ROOTMODE),
739         fsparam_u32     ("user_id",             OPT_USER_ID),
740         fsparam_u32     ("group_id",            OPT_GROUP_ID),
741         fsparam_flag    ("default_permissions", OPT_DEFAULT_PERMISSIONS),
742         fsparam_flag    ("allow_other",         OPT_ALLOW_OTHER),
743         fsparam_u32     ("max_read",            OPT_MAX_READ),
744         fsparam_u32     ("blksize",             OPT_BLKSIZE),
745         fsparam_string  ("subtype",             OPT_SUBTYPE),
746         {}
747 };
748
749 static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
750 {
751         struct fs_parse_result result;
752         struct fuse_fs_context *ctx = fsc->fs_private;
753         int opt;
754
755         if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
756                 /*
757                  * Ignore options coming from mount(MS_REMOUNT) for backward
758                  * compatibility.
759                  */
760                 if (fsc->oldapi)
761                         return 0;
762
763                 return invalfc(fsc, "No changes allowed in reconfigure");
764         }
765
766         opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
767         if (opt < 0)
768                 return opt;
769
770         switch (opt) {
771         case OPT_SOURCE:
772                 if (fsc->source)
773                         return invalfc(fsc, "Multiple sources specified");
774                 fsc->source = param->string;
775                 param->string = NULL;
776                 break;
777
778         case OPT_SUBTYPE:
779                 if (ctx->subtype)
780                         return invalfc(fsc, "Multiple subtypes specified");
781                 ctx->subtype = param->string;
782                 param->string = NULL;
783                 return 0;
784
785         case OPT_FD:
786                 ctx->fd = result.uint_32;
787                 ctx->fd_present = true;
788                 break;
789
790         case OPT_ROOTMODE:
791                 if (!fuse_valid_type(result.uint_32))
792                         return invalfc(fsc, "Invalid rootmode");
793                 ctx->rootmode = result.uint_32;
794                 ctx->rootmode_present = true;
795                 break;
796
797         case OPT_USER_ID:
798                 ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
799                 if (!uid_valid(ctx->user_id))
800                         return invalfc(fsc, "Invalid user_id");
801                 ctx->user_id_present = true;
802                 break;
803
804         case OPT_GROUP_ID:
805                 ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
806                 if (!gid_valid(ctx->group_id))
807                         return invalfc(fsc, "Invalid group_id");
808                 ctx->group_id_present = true;
809                 break;
810
811         case OPT_DEFAULT_PERMISSIONS:
812                 ctx->default_permissions = true;
813                 break;
814
815         case OPT_ALLOW_OTHER:
816                 ctx->allow_other = true;
817                 break;
818
819         case OPT_MAX_READ:
820                 ctx->max_read = result.uint_32;
821                 break;
822
823         case OPT_BLKSIZE:
824                 if (!ctx->is_bdev)
825                         return invalfc(fsc, "blksize only supported for fuseblk");
826                 ctx->blksize = result.uint_32;
827                 break;
828
829         default:
830                 return -EINVAL;
831         }
832
833         return 0;
834 }
835
836 static void fuse_free_fsc(struct fs_context *fsc)
837 {
838         struct fuse_fs_context *ctx = fsc->fs_private;
839
840         if (ctx) {
841                 kfree(ctx->subtype);
842                 kfree(ctx);
843         }
844 }
845
846 static int fuse_show_options(struct seq_file *m, struct dentry *root)
847 {
848         struct super_block *sb = root->d_sb;
849         struct fuse_conn *fc = get_fuse_conn_super(sb);
850
851         if (fc->legacy_opts_show) {
852                 seq_printf(m, ",user_id=%u",
853                            from_kuid_munged(fc->user_ns, fc->user_id));
854                 seq_printf(m, ",group_id=%u",
855                            from_kgid_munged(fc->user_ns, fc->group_id));
856                 if (fc->default_permissions)
857                         seq_puts(m, ",default_permissions");
858                 if (fc->allow_other)
859                         seq_puts(m, ",allow_other");
860                 if (fc->max_read != ~0)
861                         seq_printf(m, ",max_read=%u", fc->max_read);
862                 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
863                         seq_printf(m, ",blksize=%lu", sb->s_blocksize);
864         }
865 #ifdef CONFIG_FUSE_DAX
866         if (fc->dax_mode == FUSE_DAX_ALWAYS)
867                 seq_puts(m, ",dax=always");
868         else if (fc->dax_mode == FUSE_DAX_NEVER)
869                 seq_puts(m, ",dax=never");
870         else if (fc->dax_mode == FUSE_DAX_INODE_USER)
871                 seq_puts(m, ",dax=inode");
872 #endif
873
874         return 0;
875 }
876
877 static void fuse_iqueue_init(struct fuse_iqueue *fiq,
878                              const struct fuse_iqueue_ops *ops,
879                              void *priv)
880 {
881         memset(fiq, 0, sizeof(struct fuse_iqueue));
882         spin_lock_init(&fiq->lock);
883         init_waitqueue_head(&fiq->waitq);
884         INIT_LIST_HEAD(&fiq->pending);
885         INIT_LIST_HEAD(&fiq->interrupts);
886         fiq->forget_list_tail = &fiq->forget_list_head;
887         fiq->connected = 1;
888         fiq->ops = ops;
889         fiq->priv = priv;
890 }
891
892 static void fuse_pqueue_init(struct fuse_pqueue *fpq)
893 {
894         unsigned int i;
895
896         spin_lock_init(&fpq->lock);
897         for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
898                 INIT_LIST_HEAD(&fpq->processing[i]);
899         INIT_LIST_HEAD(&fpq->io);
900         fpq->connected = 1;
901 }
902
903 void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
904                     struct user_namespace *user_ns,
905                     const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
906 {
907         memset(fc, 0, sizeof(*fc));
908         spin_lock_init(&fc->lock);
909         spin_lock_init(&fc->bg_lock);
910         init_rwsem(&fc->killsb);
911         refcount_set(&fc->count, 1);
912         atomic_set(&fc->dev_count, 1);
913         init_waitqueue_head(&fc->blocked_waitq);
914         fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
915         INIT_LIST_HEAD(&fc->bg_queue);
916         INIT_LIST_HEAD(&fc->entry);
917         INIT_LIST_HEAD(&fc->devices);
918         atomic_set(&fc->num_waiting, 0);
919         fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
920         fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
921         atomic64_set(&fc->khctr, 0);
922         fc->polled_files = RB_ROOT;
923         fc->blocked = 0;
924         fc->initialized = 0;
925         fc->connected = 1;
926         atomic64_set(&fc->attr_version, 1);
927         get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
928         fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
929         fc->user_ns = get_user_ns(user_ns);
930         fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
931         fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
932
933         INIT_LIST_HEAD(&fc->mounts);
934         list_add(&fm->fc_entry, &fc->mounts);
935         fm->fc = fc;
936 }
937 EXPORT_SYMBOL_GPL(fuse_conn_init);
938
939 void fuse_conn_put(struct fuse_conn *fc)
940 {
941         if (refcount_dec_and_test(&fc->count)) {
942                 struct fuse_iqueue *fiq = &fc->iq;
943                 struct fuse_sync_bucket *bucket;
944
945                 if (IS_ENABLED(CONFIG_FUSE_DAX))
946                         fuse_dax_conn_free(fc);
947                 if (fiq->ops->release)
948                         fiq->ops->release(fiq);
949                 put_pid_ns(fc->pid_ns);
950                 put_user_ns(fc->user_ns);
951                 bucket = rcu_dereference_protected(fc->curr_bucket, 1);
952                 if (bucket) {
953                         WARN_ON(atomic_read(&bucket->count) != 1);
954                         kfree(bucket);
955                 }
956                 fc->release(fc);
957         }
958 }
959 EXPORT_SYMBOL_GPL(fuse_conn_put);
960
961 struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
962 {
963         refcount_inc(&fc->count);
964         return fc;
965 }
966 EXPORT_SYMBOL_GPL(fuse_conn_get);
967
968 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
969 {
970         struct fuse_attr attr;
971         memset(&attr, 0, sizeof(attr));
972
973         attr.mode = mode;
974         attr.ino = FUSE_ROOT_ID;
975         attr.nlink = 1;
976         return fuse_iget(sb, 1, 0, &attr, 0, 0);
977 }
978
979 struct fuse_inode_handle {
980         u64 nodeid;
981         u32 generation;
982 };
983
984 static struct dentry *fuse_get_dentry(struct super_block *sb,
985                                       struct fuse_inode_handle *handle)
986 {
987         struct fuse_conn *fc = get_fuse_conn_super(sb);
988         struct inode *inode;
989         struct dentry *entry;
990         int err = -ESTALE;
991
992         if (handle->nodeid == 0)
993                 goto out_err;
994
995         inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
996         if (!inode) {
997                 struct fuse_entry_out outarg;
998                 const struct qstr name = QSTR_INIT(".", 1);
999
1000                 if (!fc->export_support)
1001                         goto out_err;
1002
1003                 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
1004                                        &inode);
1005                 if (err && err != -ENOENT)
1006                         goto out_err;
1007                 if (err || !inode) {
1008                         err = -ESTALE;
1009                         goto out_err;
1010                 }
1011                 err = -EIO;
1012                 if (get_node_id(inode) != handle->nodeid)
1013                         goto out_iput;
1014         }
1015         err = -ESTALE;
1016         if (inode->i_generation != handle->generation)
1017                 goto out_iput;
1018
1019         entry = d_obtain_alias(inode);
1020         if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
1021                 fuse_invalidate_entry_cache(entry);
1022
1023         return entry;
1024
1025  out_iput:
1026         iput(inode);
1027  out_err:
1028         return ERR_PTR(err);
1029 }
1030
1031 static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
1032                            struct inode *parent)
1033 {
1034         int len = parent ? 6 : 3;
1035         u64 nodeid;
1036         u32 generation;
1037
1038         if (*max_len < len) {
1039                 *max_len = len;
1040                 return  FILEID_INVALID;
1041         }
1042
1043         nodeid = get_fuse_inode(inode)->nodeid;
1044         generation = inode->i_generation;
1045
1046         fh[0] = (u32)(nodeid >> 32);
1047         fh[1] = (u32)(nodeid & 0xffffffff);
1048         fh[2] = generation;
1049
1050         if (parent) {
1051                 nodeid = get_fuse_inode(parent)->nodeid;
1052                 generation = parent->i_generation;
1053
1054                 fh[3] = (u32)(nodeid >> 32);
1055                 fh[4] = (u32)(nodeid & 0xffffffff);
1056                 fh[5] = generation;
1057         }
1058
1059         *max_len = len;
1060         return parent ? 0x82 : 0x81;
1061 }
1062
1063 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
1064                 struct fid *fid, int fh_len, int fh_type)
1065 {
1066         struct fuse_inode_handle handle;
1067
1068         if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
1069                 return NULL;
1070
1071         handle.nodeid = (u64) fid->raw[0] << 32;
1072         handle.nodeid |= (u64) fid->raw[1];
1073         handle.generation = fid->raw[2];
1074         return fuse_get_dentry(sb, &handle);
1075 }
1076
1077 static struct dentry *fuse_fh_to_parent(struct super_block *sb,
1078                 struct fid *fid, int fh_len, int fh_type)
1079 {
1080         struct fuse_inode_handle parent;
1081
1082         if (fh_type != 0x82 || fh_len < 6)
1083                 return NULL;
1084
1085         parent.nodeid = (u64) fid->raw[3] << 32;
1086         parent.nodeid |= (u64) fid->raw[4];
1087         parent.generation = fid->raw[5];
1088         return fuse_get_dentry(sb, &parent);
1089 }
1090
1091 static struct dentry *fuse_get_parent(struct dentry *child)
1092 {
1093         struct inode *child_inode = d_inode(child);
1094         struct fuse_conn *fc = get_fuse_conn(child_inode);
1095         struct inode *inode;
1096         struct dentry *parent;
1097         struct fuse_entry_out outarg;
1098         int err;
1099
1100         if (!fc->export_support)
1101                 return ERR_PTR(-ESTALE);
1102
1103         err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
1104                                &dotdot_name, &outarg, &inode);
1105         if (err) {
1106                 if (err == -ENOENT)
1107                         return ERR_PTR(-ESTALE);
1108                 return ERR_PTR(err);
1109         }
1110
1111         parent = d_obtain_alias(inode);
1112         if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
1113                 fuse_invalidate_entry_cache(parent);
1114
1115         return parent;
1116 }
1117
1118 static const struct export_operations fuse_export_operations = {
1119         .fh_to_dentry   = fuse_fh_to_dentry,
1120         .fh_to_parent   = fuse_fh_to_parent,
1121         .encode_fh      = fuse_encode_fh,
1122         .get_parent     = fuse_get_parent,
1123 };
1124
1125 static const struct super_operations fuse_super_operations = {
1126         .alloc_inode    = fuse_alloc_inode,
1127         .free_inode     = fuse_free_inode,
1128         .evict_inode    = fuse_evict_inode,
1129         .write_inode    = fuse_write_inode,
1130         .drop_inode     = generic_delete_inode,
1131         .umount_begin   = fuse_umount_begin,
1132         .statfs         = fuse_statfs,
1133         .sync_fs        = fuse_sync_fs,
1134         .show_options   = fuse_show_options,
1135 };
1136
1137 static void sanitize_global_limit(unsigned *limit)
1138 {
1139         /*
1140          * The default maximum number of async requests is calculated to consume
1141          * 1/2^13 of the total memory, assuming 392 bytes per request.
1142          */
1143         if (*limit == 0)
1144                 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1145
1146         if (*limit >= 1 << 16)
1147                 *limit = (1 << 16) - 1;
1148 }
1149
1150 static int set_global_limit(const char *val, const struct kernel_param *kp)
1151 {
1152         int rv;
1153
1154         rv = param_set_uint(val, kp);
1155         if (rv)
1156                 return rv;
1157
1158         sanitize_global_limit((unsigned *)kp->arg);
1159
1160         return 0;
1161 }
1162
1163 static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
1164 {
1165         int cap_sys_admin = capable(CAP_SYS_ADMIN);
1166
1167         if (arg->minor < 13)
1168                 return;
1169
1170         sanitize_global_limit(&max_user_bgreq);
1171         sanitize_global_limit(&max_user_congthresh);
1172
1173         spin_lock(&fc->bg_lock);
1174         if (arg->max_background) {
1175                 fc->max_background = arg->max_background;
1176
1177                 if (!cap_sys_admin && fc->max_background > max_user_bgreq)
1178                         fc->max_background = max_user_bgreq;
1179         }
1180         if (arg->congestion_threshold) {
1181                 fc->congestion_threshold = arg->congestion_threshold;
1182
1183                 if (!cap_sys_admin &&
1184                     fc->congestion_threshold > max_user_congthresh)
1185                         fc->congestion_threshold = max_user_congthresh;
1186         }
1187         spin_unlock(&fc->bg_lock);
1188 }
1189
1190 struct fuse_init_args {
1191         struct fuse_args args;
1192         struct fuse_init_in in;
1193         struct fuse_init_out out;
1194 };
1195
1196 static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1197                                int error)
1198 {
1199         struct fuse_conn *fc = fm->fc;
1200         struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
1201         struct fuse_init_out *arg = &ia->out;
1202         bool ok = true;
1203
1204         if (error || arg->major != FUSE_KERNEL_VERSION)
1205                 ok = false;
1206         else {
1207                 unsigned long ra_pages;
1208
1209                 process_init_limits(fc, arg);
1210
1211                 if (arg->minor >= 6) {
1212                         u64 flags = arg->flags;
1213
1214                         if (flags & FUSE_INIT_EXT)
1215                                 flags |= (u64) arg->flags2 << 32;
1216
1217                         ra_pages = arg->max_readahead / PAGE_SIZE;
1218                         if (flags & FUSE_ASYNC_READ)
1219                                 fc->async_read = 1;
1220                         if (!(flags & FUSE_POSIX_LOCKS))
1221                                 fc->no_lock = 1;
1222                         if (arg->minor >= 17) {
1223                                 if (!(flags & FUSE_FLOCK_LOCKS))
1224                                         fc->no_flock = 1;
1225                         } else {
1226                                 if (!(flags & FUSE_POSIX_LOCKS))
1227                                         fc->no_flock = 1;
1228                         }
1229                         if (flags & FUSE_ATOMIC_O_TRUNC)
1230                                 fc->atomic_o_trunc = 1;
1231                         if (arg->minor >= 9) {
1232                                 /* LOOKUP has dependency on proto version */
1233                                 if (flags & FUSE_EXPORT_SUPPORT)
1234                                         fc->export_support = 1;
1235                         }
1236                         if (flags & FUSE_BIG_WRITES)
1237                                 fc->big_writes = 1;
1238                         if (flags & FUSE_DONT_MASK)
1239                                 fc->dont_mask = 1;
1240                         if (flags & FUSE_AUTO_INVAL_DATA)
1241                                 fc->auto_inval_data = 1;
1242                         else if (flags & FUSE_EXPLICIT_INVAL_DATA)
1243                                 fc->explicit_inval_data = 1;
1244                         if (flags & FUSE_DO_READDIRPLUS) {
1245                                 fc->do_readdirplus = 1;
1246                                 if (flags & FUSE_READDIRPLUS_AUTO)
1247                                         fc->readdirplus_auto = 1;
1248                         }
1249                         if (flags & FUSE_ASYNC_DIO)
1250                                 fc->async_dio = 1;
1251                         if (flags & FUSE_WRITEBACK_CACHE)
1252                                 fc->writeback_cache = 1;
1253                         if (flags & FUSE_PARALLEL_DIROPS)
1254                                 fc->parallel_dirops = 1;
1255                         if (flags & FUSE_HANDLE_KILLPRIV)
1256                                 fc->handle_killpriv = 1;
1257                         if (arg->time_gran && arg->time_gran <= 1000000000)
1258                                 fm->sb->s_time_gran = arg->time_gran;
1259                         if ((flags & FUSE_POSIX_ACL)) {
1260                                 fc->default_permissions = 1;
1261                                 fc->posix_acl = 1;
1262                         }
1263                         if (flags & FUSE_CACHE_SYMLINKS)
1264                                 fc->cache_symlinks = 1;
1265                         if (flags & FUSE_ABORT_ERROR)
1266                                 fc->abort_err = 1;
1267                         if (flags & FUSE_MAX_PAGES) {
1268                                 fc->max_pages =
1269                                         min_t(unsigned int, fc->max_pages_limit,
1270                                         max_t(unsigned int, arg->max_pages, 1));
1271                         }
1272                         if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1273                                 if (flags & FUSE_MAP_ALIGNMENT &&
1274                                     !fuse_dax_check_alignment(fc, arg->map_alignment)) {
1275                                         ok = false;
1276                                 }
1277                                 if (flags & FUSE_HAS_INODE_DAX)
1278                                         fc->inode_dax = 1;
1279                         }
1280                         if (flags & FUSE_HANDLE_KILLPRIV_V2) {
1281                                 fc->handle_killpriv_v2 = 1;
1282                                 fm->sb->s_flags |= SB_NOSEC;
1283                         }
1284                         if (flags & FUSE_SETXATTR_EXT)
1285                                 fc->setxattr_ext = 1;
1286                         if (flags & FUSE_SECURITY_CTX)
1287                                 fc->init_security = 1;
1288                         if (flags & FUSE_CREATE_SUPP_GROUP)
1289                                 fc->create_supp_group = 1;
1290                         if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
1291                                 fc->direct_io_allow_mmap = 1;
1292                 } else {
1293                         ra_pages = fc->max_read / PAGE_SIZE;
1294                         fc->no_lock = 1;
1295                         fc->no_flock = 1;
1296                 }
1297
1298                 fm->sb->s_bdi->ra_pages =
1299                                 min(fm->sb->s_bdi->ra_pages, ra_pages);
1300                 fc->minor = arg->minor;
1301                 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1302                 fc->max_write = max_t(unsigned, 4096, fc->max_write);
1303                 fc->conn_init = 1;
1304         }
1305         kfree(ia);
1306
1307         if (!ok) {
1308                 fc->conn_init = 0;
1309                 fc->conn_error = 1;
1310         }
1311
1312         fuse_set_initialized(fc);
1313         wake_up_all(&fc->blocked_waitq);
1314 }
1315
1316 void fuse_send_init(struct fuse_mount *fm)
1317 {
1318         struct fuse_init_args *ia;
1319         u64 flags;
1320
1321         ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);
1322
1323         ia->in.major = FUSE_KERNEL_VERSION;
1324         ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1325         ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1326         flags =
1327                 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1328                 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1329                 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1330                 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1331                 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1332                 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1333                 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1334                 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1335                 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
1336                 FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
1337                 FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
1338                 FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP;
1339 #ifdef CONFIG_FUSE_DAX
1340         if (fm->fc->dax)
1341                 flags |= FUSE_MAP_ALIGNMENT;
1342         if (fuse_is_inode_dax_mode(fm->fc->dax_mode))
1343                 flags |= FUSE_HAS_INODE_DAX;
1344 #endif
1345         if (fm->fc->auto_submounts)
1346                 flags |= FUSE_SUBMOUNTS;
1347
1348         ia->in.flags = flags;
1349         ia->in.flags2 = flags >> 32;
1350
1351         ia->args.opcode = FUSE_INIT;
1352         ia->args.in_numargs = 1;
1353         ia->args.in_args[0].size = sizeof(ia->in);
1354         ia->args.in_args[0].value = &ia->in;
1355         ia->args.out_numargs = 1;
1356         /* Variable length argument used for backward compatibility
1357            with interface version < 7.5.  Rest of init_out is zeroed
1358            by do_get_request(), so a short reply is not a problem */
1359         ia->args.out_argvar = true;
1360         ia->args.out_args[0].size = sizeof(ia->out);
1361         ia->args.out_args[0].value = &ia->out;
1362         ia->args.force = true;
1363         ia->args.nocreds = true;
1364         ia->args.end = process_init_reply;
1365
1366         if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
1367                 process_init_reply(fm, &ia->args, -ENOTCONN);
1368 }
1369 EXPORT_SYMBOL_GPL(fuse_send_init);
1370
1371 void fuse_free_conn(struct fuse_conn *fc)
1372 {
1373         WARN_ON(!list_empty(&fc->devices));
1374         kfree_rcu(fc, rcu);
1375 }
1376 EXPORT_SYMBOL_GPL(fuse_free_conn);
1377
1378 static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
1379 {
1380         int err;
1381         char *suffix = "";
1382
1383         if (sb->s_bdev) {
1384                 suffix = "-fuseblk";
1385                 /*
1386                  * sb->s_bdi points to blkdev's bdi however we want to redirect
1387                  * it to our private bdi...
1388                  */
1389                 bdi_put(sb->s_bdi);
1390                 sb->s_bdi = &noop_backing_dev_info;
1391         }
1392         err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
1393                                    MINOR(fc->dev), suffix);
1394         if (err)
1395                 return err;
1396
1397         /* fuse does it's own writeback accounting */
1398         sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
1399         sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1400
1401         /*
1402          * For a single fuse filesystem use max 1% of dirty +
1403          * writeback threshold.
1404          *
1405          * This gives about 1M of write buffer for memory maps on a
1406          * machine with 1G and 10% dirty_ratio, which should be more
1407          * than enough.
1408          *
1409          * Privileged users can raise it by writing to
1410          *
1411          *    /sys/class/bdi/<bdi>/max_ratio
1412          */
1413         bdi_set_max_ratio(sb->s_bdi, 1);
1414
1415         return 0;
1416 }
1417
1418 struct fuse_dev *fuse_dev_alloc(void)
1419 {
1420         struct fuse_dev *fud;
1421         struct list_head *pq;
1422
1423         fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1424         if (!fud)
1425                 return NULL;
1426
1427         pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
1428         if (!pq) {
1429                 kfree(fud);
1430                 return NULL;
1431         }
1432
1433         fud->pq.processing = pq;
1434         fuse_pqueue_init(&fud->pq);
1435
1436         return fud;
1437 }
1438 EXPORT_SYMBOL_GPL(fuse_dev_alloc);
1439
1440 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
1441 {
1442         fud->fc = fuse_conn_get(fc);
1443         spin_lock(&fc->lock);
1444         list_add_tail(&fud->entry, &fc->devices);
1445         spin_unlock(&fc->lock);
1446 }
1447 EXPORT_SYMBOL_GPL(fuse_dev_install);
1448
1449 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
1450 {
1451         struct fuse_dev *fud;
1452
1453         fud = fuse_dev_alloc();
1454         if (!fud)
1455                 return NULL;
1456
1457         fuse_dev_install(fud, fc);
1458         return fud;
1459 }
1460 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1461
1462 void fuse_dev_free(struct fuse_dev *fud)
1463 {
1464         struct fuse_conn *fc = fud->fc;
1465
1466         if (fc) {
1467                 spin_lock(&fc->lock);
1468                 list_del(&fud->entry);
1469                 spin_unlock(&fc->lock);
1470
1471                 fuse_conn_put(fc);
1472         }
1473         kfree(fud->pq.processing);
1474         kfree(fud);
1475 }
1476 EXPORT_SYMBOL_GPL(fuse_dev_free);
1477
1478 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
1479                                       const struct fuse_inode *fi)
1480 {
1481         struct timespec64 ctime = inode_get_ctime(&fi->inode);
1482
1483         *attr = (struct fuse_attr){
1484                 .ino            = fi->inode.i_ino,
1485                 .size           = fi->inode.i_size,
1486                 .blocks         = fi->inode.i_blocks,
1487                 .atime          = fi->inode.i_atime.tv_sec,
1488                 .mtime          = fi->inode.i_mtime.tv_sec,
1489                 .ctime          = ctime.tv_sec,
1490                 .atimensec      = fi->inode.i_atime.tv_nsec,
1491                 .mtimensec      = fi->inode.i_mtime.tv_nsec,
1492                 .ctimensec      = ctime.tv_nsec,
1493                 .mode           = fi->inode.i_mode,
1494                 .nlink          = fi->inode.i_nlink,
1495                 .uid            = fi->inode.i_uid.val,
1496                 .gid            = fi->inode.i_gid.val,
1497                 .rdev           = fi->inode.i_rdev,
1498                 .blksize        = 1u << fi->inode.i_blkbits,
1499         };
1500 }
1501
1502 static void fuse_sb_defaults(struct super_block *sb)
1503 {
1504         sb->s_magic = FUSE_SUPER_MAGIC;
1505         sb->s_op = &fuse_super_operations;
1506         sb->s_xattr = fuse_xattr_handlers;
1507         sb->s_maxbytes = MAX_LFS_FILESIZE;
1508         sb->s_time_gran = 1;
1509         sb->s_export_op = &fuse_export_operations;
1510         sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
1511         if (sb->s_user_ns != &init_user_ns)
1512                 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
1513         sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
1514 }
1515
1516 static int fuse_fill_super_submount(struct super_block *sb,
1517                                     struct fuse_inode *parent_fi)
1518 {
1519         struct fuse_mount *fm = get_fuse_mount_super(sb);
1520         struct super_block *parent_sb = parent_fi->inode.i_sb;
1521         struct fuse_attr root_attr;
1522         struct inode *root;
1523         struct fuse_submount_lookup *sl;
1524         struct fuse_inode *fi;
1525
1526         fuse_sb_defaults(sb);
1527         fm->sb = sb;
1528
1529         WARN_ON(sb->s_bdi != &noop_backing_dev_info);
1530         sb->s_bdi = bdi_get(parent_sb->s_bdi);
1531
1532         sb->s_xattr = parent_sb->s_xattr;
1533         sb->s_time_gran = parent_sb->s_time_gran;
1534         sb->s_blocksize = parent_sb->s_blocksize;
1535         sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
1536         sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
1537         if (parent_sb->s_subtype && !sb->s_subtype)
1538                 return -ENOMEM;
1539
1540         fuse_fill_attr_from_inode(&root_attr, parent_fi);
1541         root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
1542         /*
1543          * This inode is just a duplicate, so it is not looked up and
1544          * its nlookup should not be incremented.  fuse_iget() does
1545          * that, though, so undo it here.
1546          */
1547         fi = get_fuse_inode(root);
1548         fi->nlookup--;
1549
1550         sb->s_d_op = &fuse_dentry_operations;
1551         sb->s_root = d_make_root(root);
1552         if (!sb->s_root)
1553                 return -ENOMEM;
1554
1555         /*
1556          * Grab the parent's submount_lookup pointer and take a
1557          * reference on the shared nlookup from the parent.  This is to
1558          * prevent the last forget for this nodeid from getting
1559          * triggered until all users have finished with it.
1560          */
1561         sl = parent_fi->submount_lookup;
1562         WARN_ON(!sl);
1563         if (sl) {
1564                 refcount_inc(&sl->count);
1565                 fi->submount_lookup = sl;
1566         }
1567
1568         return 0;
1569 }
1570
1571 /* Filesystem context private data holds the FUSE inode of the mount point */
1572 static int fuse_get_tree_submount(struct fs_context *fsc)
1573 {
1574         struct fuse_mount *fm;
1575         struct fuse_inode *mp_fi = fsc->fs_private;
1576         struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
1577         struct super_block *sb;
1578         int err;
1579
1580         fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
1581         if (!fm)
1582                 return -ENOMEM;
1583
1584         fm->fc = fuse_conn_get(fc);
1585         fsc->s_fs_info = fm;
1586         sb = sget_fc(fsc, NULL, set_anon_super_fc);
1587         if (fsc->s_fs_info)
1588                 fuse_mount_destroy(fm);
1589         if (IS_ERR(sb))
1590                 return PTR_ERR(sb);
1591
1592         /* Initialize superblock, making @mp_fi its root */
1593         err = fuse_fill_super_submount(sb, mp_fi);
1594         if (err) {
1595                 deactivate_locked_super(sb);
1596                 return err;
1597         }
1598
1599         down_write(&fc->killsb);
1600         list_add_tail(&fm->fc_entry, &fc->mounts);
1601         up_write(&fc->killsb);
1602
1603         sb->s_flags |= SB_ACTIVE;
1604         fsc->root = dget(sb->s_root);
1605
1606         return 0;
1607 }
1608
1609 static const struct fs_context_operations fuse_context_submount_ops = {
1610         .get_tree       = fuse_get_tree_submount,
1611 };
1612
1613 int fuse_init_fs_context_submount(struct fs_context *fsc)
1614 {
1615         fsc->ops = &fuse_context_submount_ops;
1616         return 0;
1617 }
1618 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
1619
1620 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
1621 {
1622         struct fuse_dev *fud = NULL;
1623         struct fuse_mount *fm = get_fuse_mount_super(sb);
1624         struct fuse_conn *fc = fm->fc;
1625         struct inode *root;
1626         struct dentry *root_dentry;
1627         int err;
1628
1629         err = -EINVAL;
1630         if (sb->s_flags & SB_MANDLOCK)
1631                 goto err;
1632
1633         rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1634         fuse_sb_defaults(sb);
1635
1636         if (ctx->is_bdev) {
1637 #ifdef CONFIG_BLOCK
1638                 err = -EINVAL;
1639                 if (!sb_set_blocksize(sb, ctx->blksize))
1640                         goto err;
1641 #endif
1642         } else {
1643                 sb->s_blocksize = PAGE_SIZE;
1644                 sb->s_blocksize_bits = PAGE_SHIFT;
1645         }
1646
1647         sb->s_subtype = ctx->subtype;
1648         ctx->subtype = NULL;
1649         if (IS_ENABLED(CONFIG_FUSE_DAX)) {
1650                 err = fuse_dax_conn_alloc(fc, ctx->dax_mode, ctx->dax_dev);
1651                 if (err)
1652                         goto err;
1653         }
1654
1655         if (ctx->fudptr) {
1656                 err = -ENOMEM;
1657                 fud = fuse_dev_alloc_install(fc);
1658                 if (!fud)
1659                         goto err_free_dax;
1660         }
1661
1662         fc->dev = sb->s_dev;
1663         fm->sb = sb;
1664         err = fuse_bdi_init(fc, sb);
1665         if (err)
1666                 goto err_dev_free;
1667
1668         /* Handle umasking inside the fuse code */
1669         if (sb->s_flags & SB_POSIXACL)
1670                 fc->dont_mask = 1;
1671         sb->s_flags |= SB_POSIXACL;
1672
1673         fc->default_permissions = ctx->default_permissions;
1674         fc->allow_other = ctx->allow_other;
1675         fc->user_id = ctx->user_id;
1676         fc->group_id = ctx->group_id;
1677         fc->legacy_opts_show = ctx->legacy_opts_show;
1678         fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1679         fc->destroy = ctx->destroy;
1680         fc->no_control = ctx->no_control;
1681         fc->no_force_umount = ctx->no_force_umount;
1682
1683         err = -ENOMEM;
1684         root = fuse_get_root_inode(sb, ctx->rootmode);
1685         sb->s_d_op = &fuse_root_dentry_operations;
1686         root_dentry = d_make_root(root);
1687         if (!root_dentry)
1688                 goto err_dev_free;
1689         /* Root dentry doesn't have .d_revalidate */
1690         sb->s_d_op = &fuse_dentry_operations;
1691
1692         mutex_lock(&fuse_mutex);
1693         err = -EINVAL;
1694         if (ctx->fudptr && *ctx->fudptr)
1695                 goto err_unlock;
1696
1697         err = fuse_ctl_add_conn(fc);
1698         if (err)
1699                 goto err_unlock;
1700
1701         list_add_tail(&fc->entry, &fuse_conn_list);
1702         sb->s_root = root_dentry;
1703         if (ctx->fudptr)
1704                 *ctx->fudptr = fud;
1705         mutex_unlock(&fuse_mutex);
1706         return 0;
1707
1708  err_unlock:
1709         mutex_unlock(&fuse_mutex);
1710         dput(root_dentry);
1711  err_dev_free:
1712         if (fud)
1713                 fuse_dev_free(fud);
1714  err_free_dax:
1715         if (IS_ENABLED(CONFIG_FUSE_DAX))
1716                 fuse_dax_conn_free(fc);
1717  err:
1718         return err;
1719 }
1720 EXPORT_SYMBOL_GPL(fuse_fill_super_common);
1721
1722 static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
1723 {
1724         struct fuse_fs_context *ctx = fsc->fs_private;
1725         int err;
1726
1727         if (!ctx->file || !ctx->rootmode_present ||
1728             !ctx->user_id_present || !ctx->group_id_present)
1729                 return -EINVAL;
1730
1731         /*
1732          * Require mount to happen from the same user namespace which
1733          * opened /dev/fuse to prevent potential attacks.
1734          */
1735         if ((ctx->file->f_op != &fuse_dev_operations) ||
1736             (ctx->file->f_cred->user_ns != sb->s_user_ns))
1737                 return -EINVAL;
1738         ctx->fudptr = &ctx->file->private_data;
1739
1740         err = fuse_fill_super_common(sb, ctx);
1741         if (err)
1742                 return err;
1743         /* file->private_data shall be visible on all CPUs after this */
1744         smp_mb();
1745         fuse_send_init(get_fuse_mount_super(sb));
1746         return 0;
1747 }
1748
1749 /*
1750  * This is the path where user supplied an already initialized fuse dev.  In
1751  * this case never create a new super if the old one is gone.
1752  */
1753 static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
1754 {
1755         return -ENOTCONN;
1756 }
1757
1758 static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
1759 {
1760
1761         return fsc->sget_key == get_fuse_conn_super(sb);
1762 }
1763
1764 static int fuse_get_tree(struct fs_context *fsc)
1765 {
1766         struct fuse_fs_context *ctx = fsc->fs_private;
1767         struct fuse_dev *fud;
1768         struct fuse_conn *fc;
1769         struct fuse_mount *fm;
1770         struct super_block *sb;
1771         int err;
1772
1773         fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1774         if (!fc)
1775                 return -ENOMEM;
1776
1777         fm = kzalloc(sizeof(*fm), GFP_KERNEL);
1778         if (!fm) {
1779                 kfree(fc);
1780                 return -ENOMEM;
1781         }
1782
1783         fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL);
1784         fc->release = fuse_free_conn;
1785
1786         fsc->s_fs_info = fm;
1787
1788         if (ctx->fd_present)
1789                 ctx->file = fget(ctx->fd);
1790
1791         if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1792                 err = get_tree_bdev(fsc, fuse_fill_super);
1793                 goto out;
1794         }
1795         /*
1796          * While block dev mount can be initialized with a dummy device fd
1797          * (found by device name), normal fuse mounts can't
1798          */
1799         err = -EINVAL;
1800         if (!ctx->file)
1801                 goto out;
1802
1803         /*
1804          * Allow creating a fuse mount with an already initialized fuse
1805          * connection
1806          */
1807         fud = READ_ONCE(ctx->file->private_data);
1808         if (ctx->file->f_op == &fuse_dev_operations && fud) {
1809                 fsc->sget_key = fud->fc;
1810                 sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
1811                 err = PTR_ERR_OR_ZERO(sb);
1812                 if (!IS_ERR(sb))
1813                         fsc->root = dget(sb->s_root);
1814         } else {
1815                 err = get_tree_nodev(fsc, fuse_fill_super);
1816         }
1817 out:
1818         if (fsc->s_fs_info)
1819                 fuse_mount_destroy(fm);
1820         if (ctx->file)
1821                 fput(ctx->file);
1822         return err;
1823 }
1824
1825 static const struct fs_context_operations fuse_context_ops = {
1826         .free           = fuse_free_fsc,
1827         .parse_param    = fuse_parse_param,
1828         .reconfigure    = fuse_reconfigure,
1829         .get_tree       = fuse_get_tree,
1830 };
1831
1832 /*
1833  * Set up the filesystem mount context.
1834  */
1835 static int fuse_init_fs_context(struct fs_context *fsc)
1836 {
1837         struct fuse_fs_context *ctx;
1838
1839         ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
1840         if (!ctx)
1841                 return -ENOMEM;
1842
1843         ctx->max_read = ~0;
1844         ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1845         ctx->legacy_opts_show = true;
1846
1847 #ifdef CONFIG_BLOCK
1848         if (fsc->fs_type == &fuseblk_fs_type) {
1849                 ctx->is_bdev = true;
1850                 ctx->destroy = true;
1851         }
1852 #endif
1853
1854         fsc->fs_private = ctx;
1855         fsc->ops = &fuse_context_ops;
1856         return 0;
1857 }
1858
1859 bool fuse_mount_remove(struct fuse_mount *fm)
1860 {
1861         struct fuse_conn *fc = fm->fc;
1862         bool last = false;
1863
1864         down_write(&fc->killsb);
1865         list_del_init(&fm->fc_entry);
1866         if (list_empty(&fc->mounts))
1867                 last = true;
1868         up_write(&fc->killsb);
1869
1870         return last;
1871 }
1872 EXPORT_SYMBOL_GPL(fuse_mount_remove);
1873
1874 void fuse_conn_destroy(struct fuse_mount *fm)
1875 {
1876         struct fuse_conn *fc = fm->fc;
1877
1878         if (fc->destroy)
1879                 fuse_send_destroy(fm);
1880
1881         fuse_abort_conn(fc);
1882         fuse_wait_aborted(fc);
1883
1884         if (!list_empty(&fc->entry)) {
1885                 mutex_lock(&fuse_mutex);
1886                 list_del(&fc->entry);
1887                 fuse_ctl_remove_conn(fc);
1888                 mutex_unlock(&fuse_mutex);
1889         }
1890 }
1891 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
1892
1893 static void fuse_sb_destroy(struct super_block *sb)
1894 {
1895         struct fuse_mount *fm = get_fuse_mount_super(sb);
1896         bool last;
1897
1898         if (sb->s_root) {
1899                 last = fuse_mount_remove(fm);
1900                 if (last)
1901                         fuse_conn_destroy(fm);
1902         }
1903 }
1904
1905 void fuse_mount_destroy(struct fuse_mount *fm)
1906 {
1907         fuse_conn_put(fm->fc);
1908         kfree(fm);
1909 }
1910 EXPORT_SYMBOL(fuse_mount_destroy);
1911
1912 static void fuse_kill_sb_anon(struct super_block *sb)
1913 {
1914         fuse_sb_destroy(sb);
1915         kill_anon_super(sb);
1916         fuse_mount_destroy(get_fuse_mount_super(sb));
1917 }
1918
1919 static struct file_system_type fuse_fs_type = {
1920         .owner          = THIS_MODULE,
1921         .name           = "fuse",
1922         .fs_flags       = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1923         .init_fs_context = fuse_init_fs_context,
1924         .parameters     = fuse_fs_parameters,
1925         .kill_sb        = fuse_kill_sb_anon,
1926 };
1927 MODULE_ALIAS_FS("fuse");
1928
1929 #ifdef CONFIG_BLOCK
1930 static void fuse_kill_sb_blk(struct super_block *sb)
1931 {
1932         fuse_sb_destroy(sb);
1933         kill_block_super(sb);
1934         fuse_mount_destroy(get_fuse_mount_super(sb));
1935 }
1936
1937 static struct file_system_type fuseblk_fs_type = {
1938         .owner          = THIS_MODULE,
1939         .name           = "fuseblk",
1940         .init_fs_context = fuse_init_fs_context,
1941         .parameters     = fuse_fs_parameters,
1942         .kill_sb        = fuse_kill_sb_blk,
1943         .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1944 };
1945 MODULE_ALIAS_FS("fuseblk");
1946
1947 static inline int register_fuseblk(void)
1948 {
1949         return register_filesystem(&fuseblk_fs_type);
1950 }
1951
1952 static inline void unregister_fuseblk(void)
1953 {
1954         unregister_filesystem(&fuseblk_fs_type);
1955 }
1956 #else
1957 static inline int register_fuseblk(void)
1958 {
1959         return 0;
1960 }
1961
1962 static inline void unregister_fuseblk(void)
1963 {
1964 }
1965 #endif
1966
1967 static void fuse_inode_init_once(void *foo)
1968 {
1969         struct inode *inode = foo;
1970
1971         inode_init_once(inode);
1972 }
1973
1974 static int __init fuse_fs_init(void)
1975 {
1976         int err;
1977
1978         fuse_inode_cachep = kmem_cache_create("fuse_inode",
1979                         sizeof(struct fuse_inode), 0,
1980                         SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
1981                         fuse_inode_init_once);
1982         err = -ENOMEM;
1983         if (!fuse_inode_cachep)
1984                 goto out;
1985
1986         err = register_fuseblk();
1987         if (err)
1988                 goto out2;
1989
1990         err = register_filesystem(&fuse_fs_type);
1991         if (err)
1992                 goto out3;
1993
1994         return 0;
1995
1996  out3:
1997         unregister_fuseblk();
1998  out2:
1999         kmem_cache_destroy(fuse_inode_cachep);
2000  out:
2001         return err;
2002 }
2003
2004 static void fuse_fs_cleanup(void)
2005 {
2006         unregister_filesystem(&fuse_fs_type);
2007         unregister_fuseblk();
2008
2009         /*
2010          * Make sure all delayed rcu free inodes are flushed before we
2011          * destroy cache.
2012          */
2013         rcu_barrier();
2014         kmem_cache_destroy(fuse_inode_cachep);
2015 }
2016
2017 static struct kobject *fuse_kobj;
2018
2019 static int fuse_sysfs_init(void)
2020 {
2021         int err;
2022
2023         fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
2024         if (!fuse_kobj) {
2025                 err = -ENOMEM;
2026                 goto out_err;
2027         }
2028
2029         err = sysfs_create_mount_point(fuse_kobj, "connections");
2030         if (err)
2031                 goto out_fuse_unregister;
2032
2033         return 0;
2034
2035  out_fuse_unregister:
2036         kobject_put(fuse_kobj);
2037  out_err:
2038         return err;
2039 }
2040
2041 static void fuse_sysfs_cleanup(void)
2042 {
2043         sysfs_remove_mount_point(fuse_kobj, "connections");
2044         kobject_put(fuse_kobj);
2045 }
2046
2047 static int __init fuse_init(void)
2048 {
2049         int res;
2050
2051         pr_info("init (API version %i.%i)\n",
2052                 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
2053
2054         INIT_LIST_HEAD(&fuse_conn_list);
2055         res = fuse_fs_init();
2056         if (res)
2057                 goto err;
2058
2059         res = fuse_dev_init();
2060         if (res)
2061                 goto err_fs_cleanup;
2062
2063         res = fuse_sysfs_init();
2064         if (res)
2065                 goto err_dev_cleanup;
2066
2067         res = fuse_ctl_init();
2068         if (res)
2069                 goto err_sysfs_cleanup;
2070
2071         sanitize_global_limit(&max_user_bgreq);
2072         sanitize_global_limit(&max_user_congthresh);
2073
2074         return 0;
2075
2076  err_sysfs_cleanup:
2077         fuse_sysfs_cleanup();
2078  err_dev_cleanup:
2079         fuse_dev_cleanup();
2080  err_fs_cleanup:
2081         fuse_fs_cleanup();
2082  err:
2083         return res;
2084 }
2085
2086 static void __exit fuse_exit(void)
2087 {
2088         pr_debug("exit\n");
2089
2090         fuse_ctl_cleanup();
2091         fuse_sysfs_cleanup();
2092         fuse_fs_cleanup();
2093         fuse_dev_cleanup();
2094 }
2095
2096 module_init(fuse_init);
2097 module_exit(fuse_exit);