GNU Linux-libre 6.8.9-gnu
[releases.git] / fs / bcachefs / fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3
4 #include "bcachefs.h"
5 #include "acl.h"
6 #include "bkey_buf.h"
7 #include "btree_update.h"
8 #include "buckets.h"
9 #include "chardev.h"
10 #include "dirent.h"
11 #include "errcode.h"
12 #include "extents.h"
13 #include "fs.h"
14 #include "fs-common.h"
15 #include "fs-io.h"
16 #include "fs-ioctl.h"
17 #include "fs-io-buffered.h"
18 #include "fs-io-direct.h"
19 #include "fs-io-pagecache.h"
20 #include "fsck.h"
21 #include "inode.h"
22 #include "io_read.h"
23 #include "journal.h"
24 #include "keylist.h"
25 #include "quota.h"
26 #include "snapshot.h"
27 #include "super.h"
28 #include "xattr.h"
29
30 #include <linux/aio.h>
31 #include <linux/backing-dev.h>
32 #include <linux/exportfs.h>
33 #include <linux/fiemap.h>
34 #include <linux/module.h>
35 #include <linux/pagemap.h>
36 #include <linux/posix_acl.h>
37 #include <linux/random.h>
38 #include <linux/seq_file.h>
39 #include <linux/statfs.h>
40 #include <linux/string.h>
41 #include <linux/xattr.h>
42
43 static struct kmem_cache *bch2_inode_cache;
44
45 static void bch2_vfs_inode_init(struct btree_trans *, subvol_inum,
46                                 struct bch_inode_info *,
47                                 struct bch_inode_unpacked *,
48                                 struct bch_subvolume *);
49
50 void bch2_inode_update_after_write(struct btree_trans *trans,
51                                    struct bch_inode_info *inode,
52                                    struct bch_inode_unpacked *bi,
53                                    unsigned fields)
54 {
55         struct bch_fs *c = trans->c;
56
57         BUG_ON(bi->bi_inum != inode->v.i_ino);
58
59         bch2_assert_pos_locked(trans, BTREE_ID_inodes,
60                                POS(0, bi->bi_inum),
61                                c->opts.inodes_use_key_cache);
62
63         set_nlink(&inode->v, bch2_inode_nlink_get(bi));
64         i_uid_write(&inode->v, bi->bi_uid);
65         i_gid_write(&inode->v, bi->bi_gid);
66         inode->v.i_mode = bi->bi_mode;
67
68         if (fields & ATTR_ATIME)
69                 inode_set_atime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_atime));
70         if (fields & ATTR_MTIME)
71                 inode_set_mtime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_mtime));
72         if (fields & ATTR_CTIME)
73                 inode_set_ctime_to_ts(&inode->v, bch2_time_to_timespec(c, bi->bi_ctime));
74
75         inode->ei_inode         = *bi;
76
77         bch2_inode_flags_to_vfs(inode);
78 }
79
80 int __must_check bch2_write_inode(struct bch_fs *c,
81                                   struct bch_inode_info *inode,
82                                   inode_set_fn set,
83                                   void *p, unsigned fields)
84 {
85         struct btree_trans *trans = bch2_trans_get(c);
86         struct btree_iter iter = { NULL };
87         struct bch_inode_unpacked inode_u;
88         int ret;
89 retry:
90         bch2_trans_begin(trans);
91
92         ret   = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode),
93                                 BTREE_ITER_INTENT) ?:
94                 (set ? set(trans, inode, &inode_u, p) : 0) ?:
95                 bch2_inode_write(trans, &iter, &inode_u) ?:
96                 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
97
98         /*
99          * the btree node lock protects inode->ei_inode, not ei_update_lock;
100          * this is important for inode updates via bchfs_write_index_update
101          */
102         if (!ret)
103                 bch2_inode_update_after_write(trans, inode, &inode_u, fields);
104
105         bch2_trans_iter_exit(trans, &iter);
106
107         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
108                 goto retry;
109
110         bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
111                              "inode %u:%llu not found when updating",
112                              inode_inum(inode).subvol,
113                              inode_inum(inode).inum);
114
115         bch2_trans_put(trans);
116         return ret < 0 ? ret : 0;
117 }
118
119 int bch2_fs_quota_transfer(struct bch_fs *c,
120                            struct bch_inode_info *inode,
121                            struct bch_qid new_qid,
122                            unsigned qtypes,
123                            enum quota_acct_mode mode)
124 {
125         unsigned i;
126         int ret;
127
128         qtypes &= enabled_qtypes(c);
129
130         for (i = 0; i < QTYP_NR; i++)
131                 if (new_qid.q[i] == inode->ei_qid.q[i])
132                         qtypes &= ~(1U << i);
133
134         if (!qtypes)
135                 return 0;
136
137         mutex_lock(&inode->ei_quota_lock);
138
139         ret = bch2_quota_transfer(c, qtypes, new_qid,
140                                   inode->ei_qid,
141                                   inode->v.i_blocks +
142                                   inode->ei_quota_reserved,
143                                   mode);
144         if (!ret)
145                 for (i = 0; i < QTYP_NR; i++)
146                         if (qtypes & (1 << i))
147                                 inode->ei_qid.q[i] = new_qid.q[i];
148
149         mutex_unlock(&inode->ei_quota_lock);
150
151         return ret;
152 }
153
154 static int bch2_iget5_test(struct inode *vinode, void *p)
155 {
156         struct bch_inode_info *inode = to_bch_ei(vinode);
157         subvol_inum *inum = p;
158
159         return inode->ei_subvol == inum->subvol &&
160                 inode->ei_inode.bi_inum == inum->inum;
161 }
162
163 static int bch2_iget5_set(struct inode *vinode, void *p)
164 {
165         struct bch_inode_info *inode = to_bch_ei(vinode);
166         subvol_inum *inum = p;
167
168         inode->v.i_ino          = inum->inum;
169         inode->ei_subvol        = inum->subvol;
170         inode->ei_inode.bi_inum = inum->inum;
171         return 0;
172 }
173
174 static unsigned bch2_inode_hash(subvol_inum inum)
175 {
176         return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
177 }
178
179 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
180 {
181         struct bch_inode_unpacked inode_u;
182         struct bch_inode_info *inode;
183         struct btree_trans *trans;
184         struct bch_subvolume subvol;
185         int ret;
186
187         inode = to_bch_ei(iget5_locked(c->vfs_sb,
188                                        bch2_inode_hash(inum),
189                                        bch2_iget5_test,
190                                        bch2_iget5_set,
191                                        &inum));
192         if (unlikely(!inode))
193                 return ERR_PTR(-ENOMEM);
194         if (!(inode->v.i_state & I_NEW))
195                 return &inode->v;
196
197         trans = bch2_trans_get(c);
198         ret = lockrestart_do(trans,
199                 bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
200                 bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
201
202         if (!ret)
203                 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
204         bch2_trans_put(trans);
205
206         if (ret) {
207                 iget_failed(&inode->v);
208                 return ERR_PTR(bch2_err_class(ret));
209         }
210
211         mutex_lock(&c->vfs_inodes_lock);
212         list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
213         mutex_unlock(&c->vfs_inodes_lock);
214
215         unlock_new_inode(&inode->v);
216
217         return &inode->v;
218 }
219
220 struct bch_inode_info *
221 __bch2_create(struct mnt_idmap *idmap,
222               struct bch_inode_info *dir, struct dentry *dentry,
223               umode_t mode, dev_t rdev, subvol_inum snapshot_src,
224               unsigned flags)
225 {
226         struct bch_fs *c = dir->v.i_sb->s_fs_info;
227         struct btree_trans *trans;
228         struct bch_inode_unpacked dir_u;
229         struct bch_inode_info *inode, *old;
230         struct bch_inode_unpacked inode_u;
231         struct posix_acl *default_acl = NULL, *acl = NULL;
232         subvol_inum inum;
233         struct bch_subvolume subvol;
234         u64 journal_seq = 0;
235         int ret;
236
237         /*
238          * preallocate acls + vfs inode before btree transaction, so that
239          * nothing can fail after the transaction succeeds:
240          */
241 #ifdef CONFIG_BCACHEFS_POSIX_ACL
242         ret = posix_acl_create(&dir->v, &mode, &default_acl, &acl);
243         if (ret)
244                 return ERR_PTR(ret);
245 #endif
246         inode = to_bch_ei(new_inode(c->vfs_sb));
247         if (unlikely(!inode)) {
248                 inode = ERR_PTR(-ENOMEM);
249                 goto err;
250         }
251
252         bch2_inode_init_early(c, &inode_u);
253
254         if (!(flags & BCH_CREATE_TMPFILE))
255                 mutex_lock(&dir->ei_update_lock);
256
257         trans = bch2_trans_get(c);
258 retry:
259         bch2_trans_begin(trans);
260
261         ret   = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
262                 bch2_create_trans(trans,
263                                   inode_inum(dir), &dir_u, &inode_u,
264                                   !(flags & BCH_CREATE_TMPFILE)
265                                   ? &dentry->d_name : NULL,
266                                   from_kuid(i_user_ns(&dir->v), current_fsuid()),
267                                   from_kgid(i_user_ns(&dir->v), current_fsgid()),
268                                   mode, rdev,
269                                   default_acl, acl, snapshot_src, flags) ?:
270                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1,
271                                 KEY_TYPE_QUOTA_PREALLOC);
272         if (unlikely(ret))
273                 goto err_before_quota;
274
275         inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;
276         inum.inum = inode_u.bi_inum;
277
278         ret   = bch2_subvolume_get(trans, inum.subvol, true,
279                                    BTREE_ITER_WITH_UPDATES, &subvol) ?:
280                 bch2_trans_commit(trans, NULL, &journal_seq, 0);
281         if (unlikely(ret)) {
282                 bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
283                                 KEY_TYPE_QUOTA_WARN);
284 err_before_quota:
285                 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
286                         goto retry;
287                 goto err_trans;
288         }
289
290         if (!(flags & BCH_CREATE_TMPFILE)) {
291                 bch2_inode_update_after_write(trans, dir, &dir_u,
292                                               ATTR_MTIME|ATTR_CTIME);
293                 mutex_unlock(&dir->ei_update_lock);
294         }
295
296         bch2_iget5_set(&inode->v, &inum);
297         bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
298
299         set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
300         set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
301
302         /*
303          * we must insert the new inode into the inode cache before calling
304          * bch2_trans_exit() and dropping locks, else we could race with another
305          * thread pulling the inode in and modifying it:
306          */
307
308         inode->v.i_state |= I_CREATING;
309
310         old = to_bch_ei(inode_insert5(&inode->v,
311                                       bch2_inode_hash(inum),
312                                       bch2_iget5_test,
313                                       bch2_iget5_set,
314                                       &inum));
315         BUG_ON(!old);
316
317         if (unlikely(old != inode)) {
318                 /*
319                  * We raced, another process pulled the new inode into cache
320                  * before us:
321                  */
322                 make_bad_inode(&inode->v);
323                 iput(&inode->v);
324
325                 inode = old;
326         } else {
327                 mutex_lock(&c->vfs_inodes_lock);
328                 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
329                 mutex_unlock(&c->vfs_inodes_lock);
330                 /*
331                  * we really don't want insert_inode_locked2() to be setting
332                  * I_NEW...
333                  */
334                 unlock_new_inode(&inode->v);
335         }
336
337         bch2_trans_put(trans);
338 err:
339         posix_acl_release(default_acl);
340         posix_acl_release(acl);
341         return inode;
342 err_trans:
343         if (!(flags & BCH_CREATE_TMPFILE))
344                 mutex_unlock(&dir->ei_update_lock);
345
346         bch2_trans_put(trans);
347         make_bad_inode(&inode->v);
348         iput(&inode->v);
349         inode = ERR_PTR(ret);
350         goto err;
351 }
352
353 /* methods */
354
355 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
356                                   unsigned int flags)
357 {
358         struct bch_fs *c = vdir->i_sb->s_fs_info;
359         struct bch_inode_info *dir = to_bch_ei(vdir);
360         struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
361         struct inode *vinode = NULL;
362         subvol_inum inum = { .subvol = 1 };
363         int ret;
364
365         ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
366                                  &dentry->d_name, &inum);
367
368         if (!ret)
369                 vinode = bch2_vfs_inode_get(c, inum);
370
371         return d_splice_alias(vinode, dentry);
372 }
373
374 static int bch2_mknod(struct mnt_idmap *idmap,
375                       struct inode *vdir, struct dentry *dentry,
376                       umode_t mode, dev_t rdev)
377 {
378         struct bch_inode_info *inode =
379                 __bch2_create(idmap, to_bch_ei(vdir), dentry, mode, rdev,
380                               (subvol_inum) { 0 }, 0);
381
382         if (IS_ERR(inode))
383                 return bch2_err_class(PTR_ERR(inode));
384
385         d_instantiate(dentry, &inode->v);
386         return 0;
387 }
388
389 static int bch2_create(struct mnt_idmap *idmap,
390                        struct inode *vdir, struct dentry *dentry,
391                        umode_t mode, bool excl)
392 {
393         return bch2_mknod(idmap, vdir, dentry, mode|S_IFREG, 0);
394 }
395
396 static int __bch2_link(struct bch_fs *c,
397                        struct bch_inode_info *inode,
398                        struct bch_inode_info *dir,
399                        struct dentry *dentry)
400 {
401         struct btree_trans *trans = bch2_trans_get(c);
402         struct bch_inode_unpacked dir_u, inode_u;
403         int ret;
404
405         mutex_lock(&inode->ei_update_lock);
406
407         ret = commit_do(trans, NULL, NULL, 0,
408                         bch2_link_trans(trans,
409                                         inode_inum(dir),   &dir_u,
410                                         inode_inum(inode), &inode_u,
411                                         &dentry->d_name));
412
413         if (likely(!ret)) {
414                 bch2_inode_update_after_write(trans, dir, &dir_u,
415                                               ATTR_MTIME|ATTR_CTIME);
416                 bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME);
417         }
418
419         bch2_trans_put(trans);
420         mutex_unlock(&inode->ei_update_lock);
421         return ret;
422 }
423
424 static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
425                      struct dentry *dentry)
426 {
427         struct bch_fs *c = vdir->i_sb->s_fs_info;
428         struct bch_inode_info *dir = to_bch_ei(vdir);
429         struct bch_inode_info *inode = to_bch_ei(old_dentry->d_inode);
430         int ret;
431
432         lockdep_assert_held(&inode->v.i_rwsem);
433
434         ret   = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
435                 bch2_subvol_is_ro(c, inode->ei_subvol) ?:
436                 __bch2_link(c, inode, dir, dentry);
437         if (unlikely(ret))
438                 return bch2_err_class(ret);
439
440         ihold(&inode->v);
441         d_instantiate(dentry, &inode->v);
442         return 0;
443 }
444
445 int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
446                   bool deleting_snapshot)
447 {
448         struct bch_fs *c = vdir->i_sb->s_fs_info;
449         struct bch_inode_info *dir = to_bch_ei(vdir);
450         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
451         struct bch_inode_unpacked dir_u, inode_u;
452         struct btree_trans *trans = bch2_trans_get(c);
453         int ret;
454
455         bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
456
457         ret = commit_do(trans, NULL, NULL,
458                         BCH_TRANS_COMMIT_no_enospc,
459                 bch2_unlink_trans(trans,
460                                   inode_inum(dir), &dir_u,
461                                   &inode_u, &dentry->d_name,
462                                   deleting_snapshot));
463         if (unlikely(ret))
464                 goto err;
465
466         bch2_inode_update_after_write(trans, dir, &dir_u,
467                                       ATTR_MTIME|ATTR_CTIME);
468         bch2_inode_update_after_write(trans, inode, &inode_u,
469                                       ATTR_MTIME);
470
471         if (inode_u.bi_subvol) {
472                 /*
473                  * Subvolume deletion is asynchronous, but we still want to tell
474                  * the VFS that it's been deleted here:
475                  */
476                 set_nlink(&inode->v, 0);
477         }
478 err:
479         bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
480         bch2_trans_put(trans);
481
482         return ret;
483 }
484
485 static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
486 {
487         struct bch_inode_info *dir= to_bch_ei(vdir);
488         struct bch_fs *c = dir->v.i_sb->s_fs_info;
489
490         int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
491                 __bch2_unlink(vdir, dentry, false);
492         return bch2_err_class(ret);
493 }
494
495 static int bch2_symlink(struct mnt_idmap *idmap,
496                         struct inode *vdir, struct dentry *dentry,
497                         const char *symname)
498 {
499         struct bch_fs *c = vdir->i_sb->s_fs_info;
500         struct bch_inode_info *dir = to_bch_ei(vdir), *inode;
501         int ret;
502
503         inode = __bch2_create(idmap, dir, dentry, S_IFLNK|S_IRWXUGO, 0,
504                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
505         if (IS_ERR(inode))
506                 return bch2_err_class(PTR_ERR(inode));
507
508         inode_lock(&inode->v);
509         ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
510         inode_unlock(&inode->v);
511
512         if (unlikely(ret))
513                 goto err;
514
515         ret = filemap_write_and_wait_range(inode->v.i_mapping, 0, LLONG_MAX);
516         if (unlikely(ret))
517                 goto err;
518
519         ret = __bch2_link(c, inode, dir, dentry);
520         if (unlikely(ret))
521                 goto err;
522
523         d_instantiate(dentry, &inode->v);
524         return 0;
525 err:
526         iput(&inode->v);
527         return bch2_err_class(ret);
528 }
529
530 static int bch2_mkdir(struct mnt_idmap *idmap,
531                       struct inode *vdir, struct dentry *dentry, umode_t mode)
532 {
533         return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
534 }
535
536 static int bch2_rename2(struct mnt_idmap *idmap,
537                         struct inode *src_vdir, struct dentry *src_dentry,
538                         struct inode *dst_vdir, struct dentry *dst_dentry,
539                         unsigned flags)
540 {
541         struct bch_fs *c = src_vdir->i_sb->s_fs_info;
542         struct bch_inode_info *src_dir = to_bch_ei(src_vdir);
543         struct bch_inode_info *dst_dir = to_bch_ei(dst_vdir);
544         struct bch_inode_info *src_inode = to_bch_ei(src_dentry->d_inode);
545         struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode);
546         struct bch_inode_unpacked dst_dir_u, src_dir_u;
547         struct bch_inode_unpacked src_inode_u, dst_inode_u;
548         struct btree_trans *trans;
549         enum bch_rename_mode mode = flags & RENAME_EXCHANGE
550                 ? BCH_RENAME_EXCHANGE
551                 : dst_dentry->d_inode
552                 ? BCH_RENAME_OVERWRITE : BCH_RENAME;
553         int ret;
554
555         if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
556                 return -EINVAL;
557
558         if (mode == BCH_RENAME_OVERWRITE) {
559                 ret = filemap_write_and_wait_range(src_inode->v.i_mapping,
560                                                    0, LLONG_MAX);
561                 if (ret)
562                         return ret;
563         }
564
565         trans = bch2_trans_get(c);
566
567         bch2_lock_inodes(INODE_UPDATE_LOCK,
568                          src_dir,
569                          dst_dir,
570                          src_inode,
571                          dst_inode);
572
573         ret   = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
574                 bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
575         if (ret)
576                 goto err;
577
578         if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
579                 ret = bch2_fs_quota_transfer(c, src_inode,
580                                              dst_dir->ei_qid,
581                                              1 << QTYP_PRJ,
582                                              KEY_TYPE_QUOTA_PREALLOC);
583                 if (ret)
584                         goto err;
585         }
586
587         if (mode == BCH_RENAME_EXCHANGE &&
588             inode_attr_changing(src_dir, dst_inode, Inode_opt_project)) {
589                 ret = bch2_fs_quota_transfer(c, dst_inode,
590                                              src_dir->ei_qid,
591                                              1 << QTYP_PRJ,
592                                              KEY_TYPE_QUOTA_PREALLOC);
593                 if (ret)
594                         goto err;
595         }
596
597         ret = commit_do(trans, NULL, NULL, 0,
598                         bch2_rename_trans(trans,
599                                           inode_inum(src_dir), &src_dir_u,
600                                           inode_inum(dst_dir), &dst_dir_u,
601                                           &src_inode_u,
602                                           &dst_inode_u,
603                                           &src_dentry->d_name,
604                                           &dst_dentry->d_name,
605                                           mode));
606         if (unlikely(ret))
607                 goto err;
608
609         BUG_ON(src_inode->v.i_ino != src_inode_u.bi_inum);
610         BUG_ON(dst_inode &&
611                dst_inode->v.i_ino != dst_inode_u.bi_inum);
612
613         bch2_inode_update_after_write(trans, src_dir, &src_dir_u,
614                                       ATTR_MTIME|ATTR_CTIME);
615
616         if (src_dir != dst_dir)
617                 bch2_inode_update_after_write(trans, dst_dir, &dst_dir_u,
618                                               ATTR_MTIME|ATTR_CTIME);
619
620         bch2_inode_update_after_write(trans, src_inode, &src_inode_u,
621                                       ATTR_CTIME);
622
623         if (dst_inode)
624                 bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u,
625                                               ATTR_CTIME);
626 err:
627         bch2_trans_put(trans);
628
629         bch2_fs_quota_transfer(c, src_inode,
630                                bch_qid(&src_inode->ei_inode),
631                                1 << QTYP_PRJ,
632                                KEY_TYPE_QUOTA_NOCHECK);
633         if (dst_inode)
634                 bch2_fs_quota_transfer(c, dst_inode,
635                                        bch_qid(&dst_inode->ei_inode),
636                                        1 << QTYP_PRJ,
637                                        KEY_TYPE_QUOTA_NOCHECK);
638
639         bch2_unlock_inodes(INODE_UPDATE_LOCK,
640                            src_dir,
641                            dst_dir,
642                            src_inode,
643                            dst_inode);
644
645         return bch2_err_class(ret);
646 }
647
648 static void bch2_setattr_copy(struct mnt_idmap *idmap,
649                               struct bch_inode_info *inode,
650                               struct bch_inode_unpacked *bi,
651                               struct iattr *attr)
652 {
653         struct bch_fs *c = inode->v.i_sb->s_fs_info;
654         unsigned int ia_valid = attr->ia_valid;
655
656         if (ia_valid & ATTR_UID)
657                 bi->bi_uid = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
658         if (ia_valid & ATTR_GID)
659                 bi->bi_gid = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
660
661         if (ia_valid & ATTR_SIZE)
662                 bi->bi_size = attr->ia_size;
663
664         if (ia_valid & ATTR_ATIME)
665                 bi->bi_atime = timespec_to_bch2_time(c, attr->ia_atime);
666         if (ia_valid & ATTR_MTIME)
667                 bi->bi_mtime = timespec_to_bch2_time(c, attr->ia_mtime);
668         if (ia_valid & ATTR_CTIME)
669                 bi->bi_ctime = timespec_to_bch2_time(c, attr->ia_ctime);
670
671         if (ia_valid & ATTR_MODE) {
672                 umode_t mode = attr->ia_mode;
673                 kgid_t gid = ia_valid & ATTR_GID
674                         ? attr->ia_gid
675                         : inode->v.i_gid;
676
677                 if (!in_group_p(gid) &&
678                     !capable_wrt_inode_uidgid(idmap, &inode->v, CAP_FSETID))
679                         mode &= ~S_ISGID;
680                 bi->bi_mode = mode;
681         }
682 }
683
684 int bch2_setattr_nonsize(struct mnt_idmap *idmap,
685                          struct bch_inode_info *inode,
686                          struct iattr *attr)
687 {
688         struct bch_fs *c = inode->v.i_sb->s_fs_info;
689         struct bch_qid qid;
690         struct btree_trans *trans;
691         struct btree_iter inode_iter = { NULL };
692         struct bch_inode_unpacked inode_u;
693         struct posix_acl *acl = NULL;
694         int ret;
695
696         mutex_lock(&inode->ei_update_lock);
697
698         qid = inode->ei_qid;
699
700         if (attr->ia_valid & ATTR_UID)
701                 qid.q[QTYP_USR] = from_kuid(i_user_ns(&inode->v), attr->ia_uid);
702
703         if (attr->ia_valid & ATTR_GID)
704                 qid.q[QTYP_GRP] = from_kgid(i_user_ns(&inode->v), attr->ia_gid);
705
706         ret = bch2_fs_quota_transfer(c, inode, qid, ~0,
707                                      KEY_TYPE_QUOTA_PREALLOC);
708         if (ret)
709                 goto err;
710
711         trans = bch2_trans_get(c);
712 retry:
713         bch2_trans_begin(trans);
714         kfree(acl);
715         acl = NULL;
716
717         ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
718                               BTREE_ITER_INTENT);
719         if (ret)
720                 goto btree_err;
721
722         bch2_setattr_copy(idmap, inode, &inode_u, attr);
723
724         if (attr->ia_valid & ATTR_MODE) {
725                 ret = bch2_acl_chmod(trans, inode_inum(inode), &inode_u,
726                                      inode_u.bi_mode, &acl);
727                 if (ret)
728                         goto btree_err;
729         }
730
731         ret =   bch2_inode_write(trans, &inode_iter, &inode_u) ?:
732                 bch2_trans_commit(trans, NULL, NULL,
733                                   BCH_TRANS_COMMIT_no_enospc);
734 btree_err:
735         bch2_trans_iter_exit(trans, &inode_iter);
736
737         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
738                 goto retry;
739         if (unlikely(ret))
740                 goto err_trans;
741
742         bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid);
743
744         if (acl)
745                 set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
746 err_trans:
747         bch2_trans_put(trans);
748 err:
749         mutex_unlock(&inode->ei_update_lock);
750
751         return bch2_err_class(ret);
752 }
753
754 static int bch2_getattr(struct mnt_idmap *idmap,
755                         const struct path *path, struct kstat *stat,
756                         u32 request_mask, unsigned query_flags)
757 {
758         struct bch_inode_info *inode = to_bch_ei(d_inode(path->dentry));
759         struct bch_fs *c = inode->v.i_sb->s_fs_info;
760
761         stat->dev       = inode->v.i_sb->s_dev;
762         stat->ino       = inode->v.i_ino;
763         stat->mode      = inode->v.i_mode;
764         stat->nlink     = inode->v.i_nlink;
765         stat->uid       = inode->v.i_uid;
766         stat->gid       = inode->v.i_gid;
767         stat->rdev      = inode->v.i_rdev;
768         stat->size      = i_size_read(&inode->v);
769         stat->atime     = inode_get_atime(&inode->v);
770         stat->mtime     = inode_get_mtime(&inode->v);
771         stat->ctime     = inode_get_ctime(&inode->v);
772         stat->blksize   = block_bytes(c);
773         stat->blocks    = inode->v.i_blocks;
774
775         if (request_mask & STATX_BTIME) {
776                 stat->result_mask |= STATX_BTIME;
777                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
778         }
779
780         if (inode->ei_inode.bi_flags & BCH_INODE_immutable)
781                 stat->attributes |= STATX_ATTR_IMMUTABLE;
782         stat->attributes_mask    |= STATX_ATTR_IMMUTABLE;
783
784         if (inode->ei_inode.bi_flags & BCH_INODE_append)
785                 stat->attributes |= STATX_ATTR_APPEND;
786         stat->attributes_mask    |= STATX_ATTR_APPEND;
787
788         if (inode->ei_inode.bi_flags & BCH_INODE_nodump)
789                 stat->attributes |= STATX_ATTR_NODUMP;
790         stat->attributes_mask    |= STATX_ATTR_NODUMP;
791
792         return 0;
793 }
794
795 static int bch2_setattr(struct mnt_idmap *idmap,
796                         struct dentry *dentry, struct iattr *iattr)
797 {
798         struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
799         struct bch_fs *c = inode->v.i_sb->s_fs_info;
800         int ret;
801
802         lockdep_assert_held(&inode->v.i_rwsem);
803
804         ret   = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
805                 setattr_prepare(idmap, dentry, iattr);
806         if (ret)
807                 return ret;
808
809         return iattr->ia_valid & ATTR_SIZE
810                 ? bchfs_truncate(idmap, inode, iattr)
811                 : bch2_setattr_nonsize(idmap, inode, iattr);
812 }
813
814 static int bch2_tmpfile(struct mnt_idmap *idmap,
815                         struct inode *vdir, struct file *file, umode_t mode)
816 {
817         struct bch_inode_info *inode =
818                 __bch2_create(idmap, to_bch_ei(vdir),
819                               file->f_path.dentry, mode, 0,
820                               (subvol_inum) { 0 }, BCH_CREATE_TMPFILE);
821
822         if (IS_ERR(inode))
823                 return bch2_err_class(PTR_ERR(inode));
824
825         d_mark_tmpfile(file, &inode->v);
826         d_instantiate(file->f_path.dentry, &inode->v);
827         return finish_open_simple(file, 0);
828 }
829
830 static int bch2_fill_extent(struct bch_fs *c,
831                             struct fiemap_extent_info *info,
832                             struct bkey_s_c k, unsigned flags)
833 {
834         if (bkey_extent_is_direct_data(k.k)) {
835                 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
836                 const union bch_extent_entry *entry;
837                 struct extent_ptr_decoded p;
838                 int ret;
839
840                 if (k.k->type == KEY_TYPE_reflink_v)
841                         flags |= FIEMAP_EXTENT_SHARED;
842
843                 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
844                         int flags2 = 0;
845                         u64 offset = p.ptr.offset;
846
847                         if (p.ptr.unwritten)
848                                 flags2 |= FIEMAP_EXTENT_UNWRITTEN;
849
850                         if (p.crc.compression_type)
851                                 flags2 |= FIEMAP_EXTENT_ENCODED;
852                         else
853                                 offset += p.crc.offset;
854
855                         if ((offset & (block_sectors(c) - 1)) ||
856                             (k.k->size & (block_sectors(c) - 1)))
857                                 flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
858
859                         ret = fiemap_fill_next_extent(info,
860                                                 bkey_start_offset(k.k) << 9,
861                                                 offset << 9,
862                                                 k.k->size << 9, flags|flags2);
863                         if (ret)
864                                 return ret;
865                 }
866
867                 return 0;
868         } else if (bkey_extent_is_inline_data(k.k)) {
869                 return fiemap_fill_next_extent(info,
870                                                bkey_start_offset(k.k) << 9,
871                                                0, k.k->size << 9,
872                                                flags|
873                                                FIEMAP_EXTENT_DATA_INLINE);
874         } else if (k.k->type == KEY_TYPE_reservation) {
875                 return fiemap_fill_next_extent(info,
876                                                bkey_start_offset(k.k) << 9,
877                                                0, k.k->size << 9,
878                                                flags|
879                                                FIEMAP_EXTENT_DELALLOC|
880                                                FIEMAP_EXTENT_UNWRITTEN);
881         } else {
882                 BUG();
883         }
884 }
885
886 static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
887                        u64 start, u64 len)
888 {
889         struct bch_fs *c = vinode->i_sb->s_fs_info;
890         struct bch_inode_info *ei = to_bch_ei(vinode);
891         struct btree_trans *trans;
892         struct btree_iter iter;
893         struct bkey_s_c k;
894         struct bkey_buf cur, prev;
895         struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
896         unsigned offset_into_extent, sectors;
897         bool have_extent = false;
898         u32 snapshot;
899         int ret = 0;
900
901         ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC);
902         if (ret)
903                 return ret;
904
905         if (start + len < start)
906                 return -EINVAL;
907
908         start >>= 9;
909
910         bch2_bkey_buf_init(&cur);
911         bch2_bkey_buf_init(&prev);
912         trans = bch2_trans_get(c);
913 retry:
914         bch2_trans_begin(trans);
915
916         ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);
917         if (ret)
918                 goto err;
919
920         bch2_trans_iter_init(trans, &iter, BTREE_ID_extents,
921                              SPOS(ei->v.i_ino, start, snapshot), 0);
922
923         while (!(ret = btree_trans_too_many_iters(trans)) &&
924                (k = bch2_btree_iter_peek_upto(&iter, end)).k &&
925                !(ret = bkey_err(k))) {
926                 enum btree_id data_btree = BTREE_ID_extents;
927
928                 if (!bkey_extent_is_data(k.k) &&
929                     k.k->type != KEY_TYPE_reservation) {
930                         bch2_btree_iter_advance(&iter);
931                         continue;
932                 }
933
934                 offset_into_extent      = iter.pos.offset -
935                         bkey_start_offset(k.k);
936                 sectors                 = k.k->size - offset_into_extent;
937
938                 bch2_bkey_buf_reassemble(&cur, c, k);
939
940                 ret = bch2_read_indirect_extent(trans, &data_btree,
941                                         &offset_into_extent, &cur);
942                 if (ret)
943                         break;
944
945                 k = bkey_i_to_s_c(cur.k);
946                 bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
947
948                 sectors = min(sectors, k.k->size - offset_into_extent);
949
950                 bch2_cut_front(POS(k.k->p.inode,
951                                    bkey_start_offset(k.k) +
952                                    offset_into_extent),
953                                cur.k);
954                 bch2_key_resize(&cur.k->k, sectors);
955                 cur.k->k.p = iter.pos;
956                 cur.k->k.p.offset += cur.k->k.size;
957
958                 if (have_extent) {
959                         bch2_trans_unlock(trans);
960                         ret = bch2_fill_extent(c, info,
961                                         bkey_i_to_s_c(prev.k), 0);
962                         if (ret)
963                                 break;
964                 }
965
966                 bkey_copy(prev.k, cur.k);
967                 have_extent = true;
968
969                 bch2_btree_iter_set_pos(&iter,
970                         POS(iter.pos.inode, iter.pos.offset + sectors));
971         }
972         start = iter.pos.offset;
973         bch2_trans_iter_exit(trans, &iter);
974 err:
975         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
976                 goto retry;
977
978         if (!ret && have_extent) {
979                 bch2_trans_unlock(trans);
980                 ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
981                                        FIEMAP_EXTENT_LAST);
982         }
983
984         bch2_trans_put(trans);
985         bch2_bkey_buf_exit(&cur, c);
986         bch2_bkey_buf_exit(&prev, c);
987         return ret < 0 ? ret : 0;
988 }
989
990 static const struct vm_operations_struct bch_vm_ops = {
991         .fault          = bch2_page_fault,
992         .map_pages      = filemap_map_pages,
993         .page_mkwrite   = bch2_page_mkwrite,
994 };
995
996 static int bch2_mmap(struct file *file, struct vm_area_struct *vma)
997 {
998         file_accessed(file);
999
1000         vma->vm_ops = &bch_vm_ops;
1001         return 0;
1002 }
1003
1004 /* Directories: */
1005
1006 static loff_t bch2_dir_llseek(struct file *file, loff_t offset, int whence)
1007 {
1008         return generic_file_llseek_size(file, offset, whence,
1009                                         S64_MAX, S64_MAX);
1010 }
1011
1012 static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
1013 {
1014         struct bch_inode_info *inode = file_bch_inode(file);
1015         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1016
1017         if (!dir_emit_dots(file, ctx))
1018                 return 0;
1019
1020         int ret = bch2_readdir(c, inode_inum(inode), ctx);
1021
1022         bch_err_fn(c, ret);
1023         return bch2_err_class(ret);
1024 }
1025
1026 static int bch2_open(struct inode *vinode, struct file *file)
1027 {
1028         if (file->f_flags & (O_WRONLY|O_RDWR)) {
1029                 struct bch_inode_info *inode = to_bch_ei(vinode);
1030                 struct bch_fs *c = inode->v.i_sb->s_fs_info;
1031
1032                 int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
1033                 if (ret)
1034                         return ret;
1035         }
1036
1037         return generic_file_open(vinode, file);
1038 }
1039
1040 static const struct file_operations bch_file_operations = {
1041         .open           = bch2_open,
1042         .llseek         = bch2_llseek,
1043         .read_iter      = bch2_read_iter,
1044         .write_iter     = bch2_write_iter,
1045         .mmap           = bch2_mmap,
1046         .fsync          = bch2_fsync,
1047         .splice_read    = filemap_splice_read,
1048         .splice_write   = iter_file_splice_write,
1049         .fallocate      = bch2_fallocate_dispatch,
1050         .unlocked_ioctl = bch2_fs_file_ioctl,
1051 #ifdef CONFIG_COMPAT
1052         .compat_ioctl   = bch2_compat_fs_ioctl,
1053 #endif
1054         .remap_file_range = bch2_remap_file_range,
1055 };
1056
1057 static const struct inode_operations bch_file_inode_operations = {
1058         .getattr        = bch2_getattr,
1059         .setattr        = bch2_setattr,
1060         .fiemap         = bch2_fiemap,
1061         .listxattr      = bch2_xattr_list,
1062 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1063         .get_acl        = bch2_get_acl,
1064         .set_acl        = bch2_set_acl,
1065 #endif
1066 };
1067
1068 static const struct inode_operations bch_dir_inode_operations = {
1069         .lookup         = bch2_lookup,
1070         .create         = bch2_create,
1071         .link           = bch2_link,
1072         .unlink         = bch2_unlink,
1073         .symlink        = bch2_symlink,
1074         .mkdir          = bch2_mkdir,
1075         .rmdir          = bch2_unlink,
1076         .mknod          = bch2_mknod,
1077         .rename         = bch2_rename2,
1078         .getattr        = bch2_getattr,
1079         .setattr        = bch2_setattr,
1080         .tmpfile        = bch2_tmpfile,
1081         .listxattr      = bch2_xattr_list,
1082 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1083         .get_acl        = bch2_get_acl,
1084         .set_acl        = bch2_set_acl,
1085 #endif
1086 };
1087
1088 static const struct file_operations bch_dir_file_operations = {
1089         .llseek         = bch2_dir_llseek,
1090         .read           = generic_read_dir,
1091         .iterate_shared = bch2_vfs_readdir,
1092         .fsync          = bch2_fsync,
1093         .unlocked_ioctl = bch2_fs_file_ioctl,
1094 #ifdef CONFIG_COMPAT
1095         .compat_ioctl   = bch2_compat_fs_ioctl,
1096 #endif
1097 };
1098
1099 static const struct inode_operations bch_symlink_inode_operations = {
1100         .get_link       = page_get_link,
1101         .getattr        = bch2_getattr,
1102         .setattr        = bch2_setattr,
1103         .listxattr      = bch2_xattr_list,
1104 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1105         .get_acl        = bch2_get_acl,
1106         .set_acl        = bch2_set_acl,
1107 #endif
1108 };
1109
1110 static const struct inode_operations bch_special_inode_operations = {
1111         .getattr        = bch2_getattr,
1112         .setattr        = bch2_setattr,
1113         .listxattr      = bch2_xattr_list,
1114 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1115         .get_acl        = bch2_get_acl,
1116         .set_acl        = bch2_set_acl,
1117 #endif
1118 };
1119
1120 static const struct address_space_operations bch_address_space_operations = {
1121         .read_folio     = bch2_read_folio,
1122         .writepages     = bch2_writepages,
1123         .readahead      = bch2_readahead,
1124         .dirty_folio    = filemap_dirty_folio,
1125         .write_begin    = bch2_write_begin,
1126         .write_end      = bch2_write_end,
1127         .invalidate_folio = bch2_invalidate_folio,
1128         .release_folio  = bch2_release_folio,
1129         .direct_IO      = noop_direct_IO,
1130 #ifdef CONFIG_MIGRATION
1131         .migrate_folio  = filemap_migrate_folio,
1132 #endif
1133         .error_remove_folio = generic_error_remove_folio,
1134 };
1135
1136 struct bcachefs_fid {
1137         u64             inum;
1138         u32             subvol;
1139         u32             gen;
1140 } __packed;
1141
1142 struct bcachefs_fid_with_parent {
1143         struct bcachefs_fid     fid;
1144         struct bcachefs_fid     dir;
1145 } __packed;
1146
1147 static int bcachefs_fid_valid(int fh_len, int fh_type)
1148 {
1149         switch (fh_type) {
1150         case FILEID_BCACHEFS_WITHOUT_PARENT:
1151                 return fh_len == sizeof(struct bcachefs_fid) / sizeof(u32);
1152         case FILEID_BCACHEFS_WITH_PARENT:
1153                 return fh_len == sizeof(struct bcachefs_fid_with_parent) / sizeof(u32);
1154         default:
1155                 return false;
1156         }
1157 }
1158
1159 static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)
1160 {
1161         return (struct bcachefs_fid) {
1162                 .inum   = inode->ei_inode.bi_inum,
1163                 .subvol = inode->ei_subvol,
1164                 .gen    = inode->ei_inode.bi_generation,
1165         };
1166 }
1167
1168 static int bch2_encode_fh(struct inode *vinode, u32 *fh, int *len,
1169                           struct inode *vdir)
1170 {
1171         struct bch_inode_info *inode    = to_bch_ei(vinode);
1172         struct bch_inode_info *dir      = to_bch_ei(vdir);
1173         int min_len;
1174
1175         if (!S_ISDIR(inode->v.i_mode) && dir) {
1176                 struct bcachefs_fid_with_parent *fid = (void *) fh;
1177
1178                 min_len = sizeof(*fid) / sizeof(u32);
1179                 if (*len < min_len) {
1180                         *len = min_len;
1181                         return FILEID_INVALID;
1182                 }
1183
1184                 fid->fid = bch2_inode_to_fid(inode);
1185                 fid->dir = bch2_inode_to_fid(dir);
1186
1187                 *len = min_len;
1188                 return FILEID_BCACHEFS_WITH_PARENT;
1189         } else {
1190                 struct bcachefs_fid *fid = (void *) fh;
1191
1192                 min_len = sizeof(*fid) / sizeof(u32);
1193                 if (*len < min_len) {
1194                         *len = min_len;
1195                         return FILEID_INVALID;
1196                 }
1197                 *fid = bch2_inode_to_fid(inode);
1198
1199                 *len = min_len;
1200                 return FILEID_BCACHEFS_WITHOUT_PARENT;
1201         }
1202 }
1203
1204 static struct inode *bch2_nfs_get_inode(struct super_block *sb,
1205                                         struct bcachefs_fid fid)
1206 {
1207         struct bch_fs *c = sb->s_fs_info;
1208         struct inode *vinode = bch2_vfs_inode_get(c, (subvol_inum) {
1209                                     .subvol = fid.subvol,
1210                                     .inum = fid.inum,
1211         });
1212         if (!IS_ERR(vinode) && vinode->i_generation != fid.gen) {
1213                 iput(vinode);
1214                 vinode = ERR_PTR(-ESTALE);
1215         }
1216         return vinode;
1217 }
1218
1219 static struct dentry *bch2_fh_to_dentry(struct super_block *sb, struct fid *_fid,
1220                 int fh_len, int fh_type)
1221 {
1222         struct bcachefs_fid *fid = (void *) _fid;
1223
1224         if (!bcachefs_fid_valid(fh_len, fh_type))
1225                 return NULL;
1226
1227         return d_obtain_alias(bch2_nfs_get_inode(sb, *fid));
1228 }
1229
1230 static struct dentry *bch2_fh_to_parent(struct super_block *sb, struct fid *_fid,
1231                 int fh_len, int fh_type)
1232 {
1233         struct bcachefs_fid_with_parent *fid = (void *) _fid;
1234
1235         if (!bcachefs_fid_valid(fh_len, fh_type) ||
1236             fh_type != FILEID_BCACHEFS_WITH_PARENT)
1237                 return NULL;
1238
1239         return d_obtain_alias(bch2_nfs_get_inode(sb, fid->dir));
1240 }
1241
1242 static struct dentry *bch2_get_parent(struct dentry *child)
1243 {
1244         struct bch_inode_info *inode = to_bch_ei(child->d_inode);
1245         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1246         subvol_inum parent_inum = {
1247                 .subvol = inode->ei_inode.bi_parent_subvol ?:
1248                         inode->ei_subvol,
1249                 .inum = inode->ei_inode.bi_dir,
1250         };
1251
1252         return d_obtain_alias(bch2_vfs_inode_get(c, parent_inum));
1253 }
1254
1255 static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child)
1256 {
1257         struct bch_inode_info *inode    = to_bch_ei(child->d_inode);
1258         struct bch_inode_info *dir      = to_bch_ei(parent->d_inode);
1259         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1260         struct btree_trans *trans;
1261         struct btree_iter iter1;
1262         struct btree_iter iter2;
1263         struct bkey_s_c k;
1264         struct bkey_s_c_dirent d;
1265         struct bch_inode_unpacked inode_u;
1266         subvol_inum target;
1267         u32 snapshot;
1268         struct qstr dirent_name;
1269         unsigned name_len = 0;
1270         int ret;
1271
1272         if (!S_ISDIR(dir->v.i_mode))
1273                 return -EINVAL;
1274
1275         trans = bch2_trans_get(c);
1276
1277         bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents,
1278                              POS(dir->ei_inode.bi_inum, 0), 0);
1279         bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents,
1280                              POS(dir->ei_inode.bi_inum, 0), 0);
1281 retry:
1282         bch2_trans_begin(trans);
1283
1284         ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);
1285         if (ret)
1286                 goto err;
1287
1288         bch2_btree_iter_set_snapshot(&iter1, snapshot);
1289         bch2_btree_iter_set_snapshot(&iter2, snapshot);
1290
1291         ret = bch2_inode_find_by_inum_trans(trans, inode_inum(inode), &inode_u);
1292         if (ret)
1293                 goto err;
1294
1295         if (inode_u.bi_dir == dir->ei_inode.bi_inum) {
1296                 bch2_btree_iter_set_pos(&iter1, POS(inode_u.bi_dir, inode_u.bi_dir_offset));
1297
1298                 k = bch2_btree_iter_peek_slot(&iter1);
1299                 ret = bkey_err(k);
1300                 if (ret)
1301                         goto err;
1302
1303                 if (k.k->type != KEY_TYPE_dirent) {
1304                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1305                         goto err;
1306                 }
1307
1308                 d = bkey_s_c_to_dirent(k);
1309                 ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1310                 if (ret > 0)
1311                         ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
1312                 if (ret)
1313                         goto err;
1314
1315                 if (target.subvol       == inode->ei_subvol &&
1316                     target.inum         == inode->ei_inode.bi_inum)
1317                         goto found;
1318         } else {
1319                 /*
1320                  * File with multiple hardlinks and our backref is to the wrong
1321                  * directory - linear search:
1322                  */
1323                 for_each_btree_key_continue_norestart(iter2, 0, k, ret) {
1324                         if (k.k->p.inode > dir->ei_inode.bi_inum)
1325                                 break;
1326
1327                         if (k.k->type != KEY_TYPE_dirent)
1328                                 continue;
1329
1330                         d = bkey_s_c_to_dirent(k);
1331                         ret = bch2_dirent_read_target(trans, inode_inum(dir), d, &target);
1332                         if (ret < 0)
1333                                 break;
1334                         if (ret)
1335                                 continue;
1336
1337                         if (target.subvol       == inode->ei_subvol &&
1338                             target.inum         == inode->ei_inode.bi_inum)
1339                                 goto found;
1340                 }
1341         }
1342
1343         ret = -ENOENT;
1344         goto err;
1345 found:
1346         dirent_name = bch2_dirent_get_name(d);
1347
1348         name_len = min_t(unsigned, dirent_name.len, NAME_MAX);
1349         memcpy(name, dirent_name.name, name_len);
1350         name[name_len] = '\0';
1351 err:
1352         if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1353                 goto retry;
1354
1355         bch2_trans_iter_exit(trans, &iter1);
1356         bch2_trans_iter_exit(trans, &iter2);
1357         bch2_trans_put(trans);
1358
1359         return ret;
1360 }
1361
1362 static const struct export_operations bch_export_ops = {
1363         .encode_fh      = bch2_encode_fh,
1364         .fh_to_dentry   = bch2_fh_to_dentry,
1365         .fh_to_parent   = bch2_fh_to_parent,
1366         .get_parent     = bch2_get_parent,
1367         .get_name       = bch2_get_name,
1368 };
1369
1370 static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
1371                                 struct bch_inode_info *inode,
1372                                 struct bch_inode_unpacked *bi,
1373                                 struct bch_subvolume *subvol)
1374 {
1375         bch2_inode_update_after_write(trans, inode, bi, ~0);
1376
1377         if (BCH_SUBVOLUME_SNAP(subvol))
1378                 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1379         else
1380                 clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
1381
1382         inode->v.i_blocks       = bi->bi_sectors;
1383         inode->v.i_ino          = bi->bi_inum;
1384         inode->v.i_rdev         = bi->bi_dev;
1385         inode->v.i_generation   = bi->bi_generation;
1386         inode->v.i_size         = bi->bi_size;
1387
1388         inode->ei_flags         = 0;
1389         inode->ei_quota_reserved = 0;
1390         inode->ei_qid           = bch_qid(bi);
1391         inode->ei_subvol        = inum.subvol;
1392
1393         inode->v.i_mapping->a_ops = &bch_address_space_operations;
1394
1395         switch (inode->v.i_mode & S_IFMT) {
1396         case S_IFREG:
1397                 inode->v.i_op   = &bch_file_inode_operations;
1398                 inode->v.i_fop  = &bch_file_operations;
1399                 break;
1400         case S_IFDIR:
1401                 inode->v.i_op   = &bch_dir_inode_operations;
1402                 inode->v.i_fop  = &bch_dir_file_operations;
1403                 break;
1404         case S_IFLNK:
1405                 inode_nohighmem(&inode->v);
1406                 inode->v.i_op   = &bch_symlink_inode_operations;
1407                 break;
1408         default:
1409                 init_special_inode(&inode->v, inode->v.i_mode, inode->v.i_rdev);
1410                 inode->v.i_op   = &bch_special_inode_operations;
1411                 break;
1412         }
1413
1414         mapping_set_large_folios(inode->v.i_mapping);
1415 }
1416
1417 static struct inode *bch2_alloc_inode(struct super_block *sb)
1418 {
1419         struct bch_inode_info *inode;
1420
1421         inode = kmem_cache_alloc(bch2_inode_cache, GFP_NOFS);
1422         if (!inode)
1423                 return NULL;
1424
1425         inode_init_once(&inode->v);
1426         mutex_init(&inode->ei_update_lock);
1427         two_state_lock_init(&inode->ei_pagecache_lock);
1428         INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
1429         mutex_init(&inode->ei_quota_lock);
1430
1431         return &inode->v;
1432 }
1433
1434 static void bch2_i_callback(struct rcu_head *head)
1435 {
1436         struct inode *vinode = container_of(head, struct inode, i_rcu);
1437         struct bch_inode_info *inode = to_bch_ei(vinode);
1438
1439         kmem_cache_free(bch2_inode_cache, inode);
1440 }
1441
1442 static void bch2_destroy_inode(struct inode *vinode)
1443 {
1444         call_rcu(&vinode->i_rcu, bch2_i_callback);
1445 }
1446
1447 static int inode_update_times_fn(struct btree_trans *trans,
1448                                  struct bch_inode_info *inode,
1449                                  struct bch_inode_unpacked *bi,
1450                                  void *p)
1451 {
1452         struct bch_fs *c = inode->v.i_sb->s_fs_info;
1453
1454         bi->bi_atime    = timespec_to_bch2_time(c, inode_get_atime(&inode->v));
1455         bi->bi_mtime    = timespec_to_bch2_time(c, inode_get_mtime(&inode->v));
1456         bi->bi_ctime    = timespec_to_bch2_time(c, inode_get_ctime(&inode->v));
1457
1458         return 0;
1459 }
1460
1461 static int bch2_vfs_write_inode(struct inode *vinode,
1462                                 struct writeback_control *wbc)
1463 {
1464         struct bch_fs *c = vinode->i_sb->s_fs_info;
1465         struct bch_inode_info *inode = to_bch_ei(vinode);
1466         int ret;
1467
1468         mutex_lock(&inode->ei_update_lock);
1469         ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
1470                                ATTR_ATIME|ATTR_MTIME|ATTR_CTIME);
1471         mutex_unlock(&inode->ei_update_lock);
1472
1473         return bch2_err_class(ret);
1474 }
1475
1476 static void bch2_evict_inode(struct inode *vinode)
1477 {
1478         struct bch_fs *c = vinode->i_sb->s_fs_info;
1479         struct bch_inode_info *inode = to_bch_ei(vinode);
1480
1481         truncate_inode_pages_final(&inode->v.i_data);
1482
1483         clear_inode(&inode->v);
1484
1485         BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);
1486
1487         if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {
1488                 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),
1489                                 KEY_TYPE_QUOTA_WARN);
1490                 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,
1491                                 KEY_TYPE_QUOTA_WARN);
1492                 bch2_inode_rm(c, inode_inum(inode));
1493         }
1494
1495         mutex_lock(&c->vfs_inodes_lock);
1496         list_del_init(&inode->ei_vfs_inode_list);
1497         mutex_unlock(&c->vfs_inodes_lock);
1498 }
1499
1500 void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s)
1501 {
1502         struct bch_inode_info *inode;
1503         DARRAY(struct bch_inode_info *) grabbed;
1504         bool clean_pass = false, this_pass_clean;
1505
1506         /*
1507          * Initially, we scan for inodes without I_DONTCACHE, then mark them to
1508          * be pruned with d_mark_dontcache().
1509          *
1510          * Once we've had a clean pass where we didn't find any inodes without
1511          * I_DONTCACHE, we wait for them to be freed:
1512          */
1513
1514         darray_init(&grabbed);
1515         darray_make_room(&grabbed, 1024);
1516 again:
1517         cond_resched();
1518         this_pass_clean = true;
1519
1520         mutex_lock(&c->vfs_inodes_lock);
1521         list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {
1522                 if (!snapshot_list_has_id(s, inode->ei_subvol))
1523                         continue;
1524
1525                 if (!(inode->v.i_state & I_DONTCACHE) &&
1526                     !(inode->v.i_state & I_FREEING) &&
1527                     igrab(&inode->v)) {
1528                         this_pass_clean = false;
1529
1530                         if (darray_push_gfp(&grabbed, inode, GFP_ATOMIC|__GFP_NOWARN)) {
1531                                 iput(&inode->v);
1532                                 break;
1533                         }
1534                 } else if (clean_pass && this_pass_clean) {
1535                         wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
1536                         DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
1537
1538                         prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
1539                         mutex_unlock(&c->vfs_inodes_lock);
1540
1541                         schedule();
1542                         finish_wait(wq, &wait.wq_entry);
1543                         goto again;
1544                 }
1545         }
1546         mutex_unlock(&c->vfs_inodes_lock);
1547
1548         darray_for_each(grabbed, i) {
1549                 inode = *i;
1550                 d_mark_dontcache(&inode->v);
1551                 d_prune_aliases(&inode->v);
1552                 iput(&inode->v);
1553         }
1554         grabbed.nr = 0;
1555
1556         if (!clean_pass || !this_pass_clean) {
1557                 clean_pass = this_pass_clean;
1558                 goto again;
1559         }
1560
1561         darray_exit(&grabbed);
1562 }
1563
1564 static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
1565 {
1566         struct super_block *sb = dentry->d_sb;
1567         struct bch_fs *c = sb->s_fs_info;
1568         struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
1569         unsigned shift = sb->s_blocksize_bits - 9;
1570         /*
1571          * this assumes inodes take up 64 bytes, which is a decent average
1572          * number:
1573          */
1574         u64 avail_inodes = ((usage.capacity - usage.used) << 3);
1575         u64 fsid;
1576
1577         buf->f_type     = BCACHEFS_STATFS_MAGIC;
1578         buf->f_bsize    = sb->s_blocksize;
1579         buf->f_blocks   = usage.capacity >> shift;
1580         buf->f_bfree    = usage.free >> shift;
1581         buf->f_bavail   = avail_factor(usage.free) >> shift;
1582
1583         buf->f_files    = usage.nr_inodes + avail_inodes;
1584         buf->f_ffree    = avail_inodes;
1585
1586         fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
1587                le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
1588         buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1589         buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1590         buf->f_namelen  = BCH_NAME_MAX;
1591
1592         return 0;
1593 }
1594
1595 static int bch2_sync_fs(struct super_block *sb, int wait)
1596 {
1597         struct bch_fs *c = sb->s_fs_info;
1598         int ret;
1599
1600         if (c->opts.journal_flush_disabled)
1601                 return 0;
1602
1603         if (!wait) {
1604                 bch2_journal_flush_async(&c->journal, NULL);
1605                 return 0;
1606         }
1607
1608         ret = bch2_journal_flush(&c->journal);
1609         return bch2_err_class(ret);
1610 }
1611
1612 static struct bch_fs *bch2_path_to_fs(const char *path)
1613 {
1614         struct bch_fs *c;
1615         dev_t dev;
1616         int ret;
1617
1618         ret = lookup_bdev(path, &dev);
1619         if (ret)
1620                 return ERR_PTR(ret);
1621
1622         c = bch2_dev_to_fs(dev);
1623         if (c)
1624                 closure_put(&c->cl);
1625         return c ?: ERR_PTR(-ENOENT);
1626 }
1627
1628 static int bch2_remount(struct super_block *sb, int *flags, char *data)
1629 {
1630         struct bch_fs *c = sb->s_fs_info;
1631         struct bch_opts opts = bch2_opts_empty();
1632         int ret;
1633
1634         ret = bch2_parse_mount_opts(c, &opts, data);
1635         if (ret)
1636                 goto err;
1637
1638         opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
1639
1640         if (opts.read_only != c->opts.read_only) {
1641                 down_write(&c->state_lock);
1642
1643                 if (opts.read_only) {
1644                         bch2_fs_read_only(c);
1645
1646                         sb->s_flags |= SB_RDONLY;
1647                 } else {
1648                         ret = bch2_fs_read_write(c);
1649                         if (ret) {
1650                                 bch_err(c, "error going rw: %i", ret);
1651                                 up_write(&c->state_lock);
1652                                 ret = -EINVAL;
1653                                 goto err;
1654                         }
1655
1656                         sb->s_flags &= ~SB_RDONLY;
1657                 }
1658
1659                 c->opts.read_only = opts.read_only;
1660
1661                 up_write(&c->state_lock);
1662         }
1663
1664         if (opt_defined(opts, errors))
1665                 c->opts.errors = opts.errors;
1666 err:
1667         return bch2_err_class(ret);
1668 }
1669
1670 static int bch2_show_devname(struct seq_file *seq, struct dentry *root)
1671 {
1672         struct bch_fs *c = root->d_sb->s_fs_info;
1673         bool first = true;
1674
1675         for_each_online_member(c, ca) {
1676                 if (!first)
1677                         seq_putc(seq, ':');
1678                 first = false;
1679                 seq_puts(seq, ca->disk_sb.sb_name);
1680         }
1681
1682         return 0;
1683 }
1684
1685 static int bch2_show_options(struct seq_file *seq, struct dentry *root)
1686 {
1687         struct bch_fs *c = root->d_sb->s_fs_info;
1688         enum bch_opt_id i;
1689         struct printbuf buf = PRINTBUF;
1690         int ret = 0;
1691
1692         for (i = 0; i < bch2_opts_nr; i++) {
1693                 const struct bch_option *opt = &bch2_opt_table[i];
1694                 u64 v = bch2_opt_get_by_id(&c->opts, i);
1695
1696                 if (!(opt->flags & OPT_MOUNT))
1697                         continue;
1698
1699                 if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
1700                         continue;
1701
1702                 printbuf_reset(&buf);
1703                 bch2_opt_to_text(&buf, c, c->disk_sb.sb, opt, v,
1704                                  OPT_SHOW_MOUNT_STYLE);
1705                 seq_putc(seq, ',');
1706                 seq_puts(seq, buf.buf);
1707         }
1708
1709         if (buf.allocation_failure)
1710                 ret = -ENOMEM;
1711         printbuf_exit(&buf);
1712         return ret;
1713 }
1714
1715 static void bch2_put_super(struct super_block *sb)
1716 {
1717         struct bch_fs *c = sb->s_fs_info;
1718
1719         __bch2_fs_stop(c);
1720 }
1721
1722 /*
1723  * bcachefs doesn't currently integrate intwrite freeze protection but the
1724  * internal write references serve the same purpose. Therefore reuse the
1725  * read-only transition code to perform the quiesce. The caveat is that we don't
1726  * currently have the ability to block tasks that want a write reference while
1727  * the superblock is frozen. This is fine for now, but we should either add
1728  * blocking support or find a way to integrate sb_start_intwrite() and friends.
1729  */
1730 static int bch2_freeze(struct super_block *sb)
1731 {
1732         struct bch_fs *c = sb->s_fs_info;
1733
1734         down_write(&c->state_lock);
1735         bch2_fs_read_only(c);
1736         up_write(&c->state_lock);
1737         return 0;
1738 }
1739
1740 static int bch2_unfreeze(struct super_block *sb)
1741 {
1742         struct bch_fs *c = sb->s_fs_info;
1743         int ret;
1744
1745         if (test_bit(BCH_FS_emergency_ro, &c->flags))
1746                 return 0;
1747
1748         down_write(&c->state_lock);
1749         ret = bch2_fs_read_write(c);
1750         up_write(&c->state_lock);
1751         return ret;
1752 }
1753
1754 static const struct super_operations bch_super_operations = {
1755         .alloc_inode    = bch2_alloc_inode,
1756         .destroy_inode  = bch2_destroy_inode,
1757         .write_inode    = bch2_vfs_write_inode,
1758         .evict_inode    = bch2_evict_inode,
1759         .sync_fs        = bch2_sync_fs,
1760         .statfs         = bch2_statfs,
1761         .show_devname   = bch2_show_devname,
1762         .show_options   = bch2_show_options,
1763         .remount_fs     = bch2_remount,
1764         .put_super      = bch2_put_super,
1765         .freeze_fs      = bch2_freeze,
1766         .unfreeze_fs    = bch2_unfreeze,
1767 };
1768
1769 static int bch2_set_super(struct super_block *s, void *data)
1770 {
1771         s->s_fs_info = data;
1772         return 0;
1773 }
1774
1775 static int bch2_noset_super(struct super_block *s, void *data)
1776 {
1777         return -EBUSY;
1778 }
1779
1780 typedef DARRAY(struct bch_fs *) darray_fs;
1781
1782 static int bch2_test_super(struct super_block *s, void *data)
1783 {
1784         struct bch_fs *c = s->s_fs_info;
1785         darray_fs *d = data;
1786
1787         if (!c)
1788                 return false;
1789
1790         darray_for_each(*d, i)
1791                 if (c != *i)
1792                         return false;
1793         return true;
1794 }
1795
1796 static struct dentry *bch2_mount(struct file_system_type *fs_type,
1797                                  int flags, const char *dev_name, void *data)
1798 {
1799         struct bch_fs *c;
1800         struct super_block *sb;
1801         struct inode *vinode;
1802         struct bch_opts opts = bch2_opts_empty();
1803         int ret;
1804
1805         opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
1806
1807         ret = bch2_parse_mount_opts(NULL, &opts, data);
1808         if (ret)
1809                 return ERR_PTR(ret);
1810
1811         if (!dev_name || strlen(dev_name) == 0)
1812                 return ERR_PTR(-EINVAL);
1813
1814         darray_str devs;
1815         ret = bch2_split_devs(dev_name, &devs);
1816         if (ret)
1817                 return ERR_PTR(ret);
1818
1819         darray_fs devs_to_fs = {};
1820         darray_for_each(devs, i) {
1821                 ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
1822                 if (ret) {
1823                         sb = ERR_PTR(ret);
1824                         goto got_sb;
1825                 }
1826         }
1827
1828         sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs);
1829         if (!IS_ERR(sb))
1830                 goto got_sb;
1831
1832         c = bch2_fs_open(devs.data, devs.nr, opts);
1833         if (IS_ERR(c)) {
1834                 sb = ERR_CAST(c);
1835                 goto got_sb;
1836         }
1837
1838         /* Some options can't be parsed until after the fs is started: */
1839         ret = bch2_parse_mount_opts(c, &opts, data);
1840         if (ret) {
1841                 bch2_fs_stop(c);
1842                 sb = ERR_PTR(ret);
1843                 goto got_sb;
1844         }
1845
1846         bch2_opts_apply(&c->opts, opts);
1847
1848         sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
1849         if (IS_ERR(sb))
1850                 bch2_fs_stop(c);
1851 got_sb:
1852         darray_exit(&devs_to_fs);
1853         bch2_darray_str_exit(&devs);
1854
1855         if (IS_ERR(sb)) {
1856                 ret = PTR_ERR(sb);
1857                 ret = bch2_err_class(ret);
1858                 return ERR_PTR(ret);
1859         }
1860
1861         c = sb->s_fs_info;
1862
1863         if (sb->s_root) {
1864                 if ((flags ^ sb->s_flags) & SB_RDONLY) {
1865                         ret = -EBUSY;
1866                         goto err_put_super;
1867                 }
1868                 goto out;
1869         }
1870
1871         sb->s_blocksize         = block_bytes(c);
1872         sb->s_blocksize_bits    = ilog2(block_bytes(c));
1873         sb->s_maxbytes          = MAX_LFS_FILESIZE;
1874         sb->s_op                = &bch_super_operations;
1875         sb->s_export_op         = &bch_export_ops;
1876 #ifdef CONFIG_BCACHEFS_QUOTA
1877         sb->s_qcop              = &bch2_quotactl_operations;
1878         sb->s_quota_types       = QTYPE_MASK_USR|QTYPE_MASK_GRP|QTYPE_MASK_PRJ;
1879 #endif
1880         sb->s_xattr             = bch2_xattr_handlers;
1881         sb->s_magic             = BCACHEFS_STATFS_MAGIC;
1882         sb->s_time_gran         = c->sb.nsec_per_time_unit;
1883         sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
1884         sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
1885         c->vfs_sb               = sb;
1886         strscpy(sb->s_id, c->name, sizeof(sb->s_id));
1887
1888         ret = super_setup_bdi(sb);
1889         if (ret)
1890                 goto err_put_super;
1891
1892         sb->s_bdi->ra_pages             = VM_READAHEAD_PAGES;
1893
1894         for_each_online_member(c, ca) {
1895                 struct block_device *bdev = ca->disk_sb.bdev;
1896
1897                 /* XXX: create an anonymous device for multi device filesystems */
1898                 sb->s_bdev      = bdev;
1899                 sb->s_dev       = bdev->bd_dev;
1900                 percpu_ref_put(&ca->io_ref);
1901                 break;
1902         }
1903
1904         c->dev = sb->s_dev;
1905
1906 #ifdef CONFIG_BCACHEFS_POSIX_ACL
1907         if (c->opts.acl)
1908                 sb->s_flags     |= SB_POSIXACL;
1909 #endif
1910
1911         sb->s_shrink->seeks = 0;
1912
1913         vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
1914         ret = PTR_ERR_OR_ZERO(vinode);
1915         bch_err_msg(c, ret, "mounting: error getting root inode");
1916         if (ret)
1917                 goto err_put_super;
1918
1919         sb->s_root = d_make_root(vinode);
1920         if (!sb->s_root) {
1921                 bch_err(c, "error mounting: error allocating root dentry");
1922                 ret = -ENOMEM;
1923                 goto err_put_super;
1924         }
1925
1926         sb->s_flags |= SB_ACTIVE;
1927 out:
1928         return dget(sb->s_root);
1929
1930 err_put_super:
1931         deactivate_locked_super(sb);
1932         return ERR_PTR(bch2_err_class(ret));
1933 }
1934
1935 static void bch2_kill_sb(struct super_block *sb)
1936 {
1937         struct bch_fs *c = sb->s_fs_info;
1938
1939         generic_shutdown_super(sb);
1940         bch2_fs_free(c);
1941 }
1942
1943 static struct file_system_type bcache_fs_type = {
1944         .owner          = THIS_MODULE,
1945         .name           = "bcachefs",
1946         .mount          = bch2_mount,
1947         .kill_sb        = bch2_kill_sb,
1948         .fs_flags       = FS_REQUIRES_DEV,
1949 };
1950
1951 MODULE_ALIAS_FS("bcachefs");
1952
1953 void bch2_vfs_exit(void)
1954 {
1955         unregister_filesystem(&bcache_fs_type);
1956         kmem_cache_destroy(bch2_inode_cache);
1957 }
1958
1959 int __init bch2_vfs_init(void)
1960 {
1961         int ret = -ENOMEM;
1962
1963         bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
1964         if (!bch2_inode_cache)
1965                 goto err;
1966
1967         ret = register_filesystem(&bcache_fs_type);
1968         if (ret)
1969                 goto err;
1970
1971         return 0;
1972 err:
1973         bch2_vfs_exit();
1974         return ret;
1975 }
1976
1977 #endif /* NO_BCACHEFS_FS */