1 // SPDX-License-Identifier: GPL-2.0
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
10 #include "subvolume.h"
12 #include <linux/random.h>
14 static int bch2_subvolume_delete(struct btree_trans *, u32);
16 static struct bpos subvolume_children_pos(struct bkey_s_c k)
18 if (k.k->type != KEY_TYPE_subvolume)
21 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
22 if (!s.v->fs_path_parent)
24 return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
27 static int check_subvol(struct btree_trans *trans,
28 struct btree_iter *iter,
31 struct bch_fs *c = trans->c;
32 struct bkey_s_c_subvolume subvol;
33 struct btree_iter subvol_children_iter = {};
34 struct bch_snapshot snapshot;
35 struct printbuf buf = PRINTBUF;
39 if (k.k->type != KEY_TYPE_subvolume)
42 subvol = bkey_s_c_to_subvolume(k);
43 snapid = le32_to_cpu(subvol.v->snapshot);
44 ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
46 if (bch2_err_matches(ret, ENOENT))
47 bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
48 k.k->p.offset, snapid);
52 if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
53 ret = bch2_subvolume_delete(trans, iter->pos.offset);
54 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
55 return ret ?: -BCH_ERR_transaction_restart_nested;
58 if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
59 subvol.v->fs_path_parent,
60 c, subvol_root_fs_path_parent_nonzero,
61 "root subvolume has nonzero fs_path_parent\n%s",
62 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
63 struct bkey_i_subvolume *n =
64 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
65 ret = PTR_ERR_OR_ZERO(n);
69 n->v.fs_path_parent = 0;
72 if (subvol.v->fs_path_parent) {
73 struct bpos pos = subvolume_children_pos(k);
75 struct bkey_s_c subvol_children_k =
76 bch2_bkey_get_iter(trans, &subvol_children_iter,
77 BTREE_ID_subvolume_children, pos, 0);
78 ret = bkey_err(subvol_children_k);
82 if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
83 c, subvol_children_not_set,
84 "subvolume not set in subvolume_children btree at %llu:%llu\n%s",
85 pos.inode, pos.offset,
86 (printbuf_reset(&buf),
87 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
88 ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
94 struct bch_inode_unpacked inode;
95 struct btree_iter inode_iter = {};
96 ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
97 (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
99 bch2_trans_iter_exit(trans, &inode_iter);
101 if (ret && !bch2_err_matches(ret, ENOENT))
104 if (fsck_err_on(ret, c, subvol_to_missing_root,
105 "subvolume %llu points to missing subvolume root %llu:%u",
106 k.k->p.offset, le64_to_cpu(subvol.v->inode),
107 le32_to_cpu(subvol.v->snapshot))) {
108 ret = bch2_subvolume_delete(trans, iter->pos.offset);
109 bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
110 return ret ?: -BCH_ERR_transaction_restart_nested;
113 if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
114 c, subvol_root_wrong_bi_subvol,
115 "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
116 inode.bi_inum, inode_iter.k.p.snapshot,
117 inode.bi_subvol, subvol.k->p.offset)) {
118 inode.bi_subvol = subvol.k->p.offset;
119 ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
124 if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
125 u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
127 struct bch_snapshot_tree st;
130 snapshot_tree = snapshot_t(c, snapshot_root)->tree;
133 ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
135 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
136 "%s: snapshot tree %u not found", __func__, snapshot_tree);
141 if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset,
142 c, subvol_not_master_and_not_snapshot,
143 "subvolume %llu is not set as snapshot but is not master subvolume",
145 struct bkey_i_subvolume *s =
146 bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
147 ret = PTR_ERR_OR_ZERO(s);
151 SET_BCH_SUBVOLUME_SNAP(&s->v, true);
156 bch2_trans_iter_exit(trans, &subvol_children_iter);
161 int bch2_check_subvols(struct bch_fs *c)
163 int ret = bch2_trans_run(c,
164 for_each_btree_key_commit(trans, iter,
165 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
166 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
167 check_subvol(trans, &iter, k)));
172 static int check_subvol_child(struct btree_trans *trans,
173 struct btree_iter *child_iter,
174 struct bkey_s_c child_k)
176 struct bch_fs *c = trans->c;
177 struct bch_subvolume s;
178 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
180 if (ret && !bch2_err_matches(ret, ENOENT))
183 if (fsck_err_on(ret ||
184 le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
185 c, subvol_children_bad,
186 "incorrect entry in subvolume_children btree %llu:%llu",
187 child_k.k->p.inode, child_k.k->p.offset)) {
188 ret = bch2_btree_delete_at(trans, child_iter, 0);
197 int bch2_check_subvol_children(struct bch_fs *c)
199 int ret = bch2_trans_run(c,
200 for_each_btree_key_commit(trans, iter,
201 BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
202 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
203 check_subvol_child(trans, &iter, k)));
210 int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
211 enum bkey_invalid_flags flags, struct printbuf *err)
215 bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
216 bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err,
223 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
226 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
228 prt_printf(out, "root %llu snapshot id %u",
229 le64_to_cpu(s.v->inode),
230 le32_to_cpu(s.v->snapshot));
232 if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
233 prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
234 prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
238 static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
240 return !bpos_eq(pos, POS_MIN)
241 ? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
245 int bch2_subvolume_trigger(struct btree_trans *trans,
246 enum btree_id btree_id, unsigned level,
247 struct bkey_s_c old, struct bkey_s new,
250 if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
251 struct bpos children_pos_old = subvolume_children_pos(old);
252 struct bpos children_pos_new = subvolume_children_pos(new.s_c);
254 if (!bpos_eq(children_pos_old, children_pos_new)) {
255 int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
256 subvolume_children_mod(trans, children_pos_new, true);
265 int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
267 struct btree_iter iter;
269 bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
270 struct bkey_s_c k = bch2_btree_iter_peek(&iter);
271 bch2_trans_iter_exit(trans, &iter);
273 return bkey_err(k) ?: k.k && k.k->p.inode == subvol
274 ? -BCH_ERR_ENOTEMPTY_subvol_not_empty
278 static __always_inline int
279 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
280 bool inconsistent_if_not_found,
282 struct bch_subvolume *s)
284 int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
285 iter_flags, subvolume, s);
286 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
287 inconsistent_if_not_found,
288 trans->c, "missing subvolume %u", subvol);
292 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
293 bool inconsistent_if_not_found,
295 struct bch_subvolume *s)
297 return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
300 int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
302 struct bch_subvolume s;
303 int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
307 if (BCH_SUBVOLUME_RO(&s))
312 int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
314 return bch2_trans_do(c, NULL, NULL, 0,
315 bch2_subvol_is_ro_trans(trans, subvol));
318 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
319 struct bch_subvolume *subvol)
321 struct bch_snapshot snap;
323 return bch2_snapshot_lookup(trans, snapshot, &snap) ?:
324 bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
327 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
330 struct btree_iter iter;
331 struct bkey_s_c_subvolume subvol;
334 subvol = bch2_bkey_get_iter_typed(trans, &iter,
335 BTREE_ID_subvolumes, POS(0, subvolid),
336 BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES,
338 ret = bkey_err(subvol);
339 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
340 "missing subvolume %u", subvolid);
343 *snapid = le32_to_cpu(subvol.v->snapshot);
344 bch2_trans_iter_exit(trans, &iter);
348 static int bch2_subvolume_reparent(struct btree_trans *trans,
349 struct btree_iter *iter,
351 u32 old_parent, u32 new_parent)
353 struct bkey_i_subvolume *s;
356 if (k.k->type != KEY_TYPE_subvolume)
359 if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
360 le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
363 s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
364 ret = PTR_ERR_OR_ZERO(s);
368 s->v.creation_parent = cpu_to_le32(new_parent);
373 * Separate from the snapshot tree in the snapshots btree, we record the tree
374 * structure of how snapshot subvolumes were created - the parent subvolume of
375 * each snapshot subvolume.
377 * When a subvolume is deleted, we scan for child subvolumes and reparant them,
378 * to avoid dangling references:
380 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
382 struct bch_subvolume s;
384 return lockrestart_do(trans,
385 bch2_subvolume_get(trans, subvolid_to_delete, true,
386 BTREE_ITER_CACHED, &s)) ?:
387 for_each_btree_key_commit(trans, iter,
388 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
389 NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
390 bch2_subvolume_reparent(trans, &iter, k,
391 subvolid_to_delete, le32_to_cpu(s.creation_parent)));
395 * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
398 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
400 struct btree_iter iter;
401 struct bkey_s_c_subvolume subvol;
405 subvol = bch2_bkey_get_iter_typed(trans, &iter,
406 BTREE_ID_subvolumes, POS(0, subvolid),
407 BTREE_ITER_CACHED|BTREE_ITER_INTENT,
409 ret = bkey_err(subvol);
410 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
411 "missing subvolume %u", subvolid);
415 snapid = le32_to_cpu(subvol.v->snapshot);
417 ret = bch2_btree_delete_at(trans, &iter, 0) ?:
418 bch2_snapshot_node_set_deleted(trans, snapid);
419 bch2_trans_iter_exit(trans, &iter);
423 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
425 return bch2_subvolumes_reparent(trans, subvolid) ?:
426 commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
427 __bch2_subvolume_delete(trans, subvolid));
430 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
432 struct bch_fs *c = container_of(work, struct bch_fs,
433 snapshot_wait_for_pagecache_and_delete_work);
439 mutex_lock(&c->snapshots_unlinked_lock);
440 s = c->snapshots_unlinked;
441 darray_init(&c->snapshots_unlinked);
442 mutex_unlock(&c->snapshots_unlinked_lock);
447 bch2_evict_subvolume_inodes(c, &s);
449 for (id = s.data; id < s.data + s.nr; id++) {
450 ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
451 bch_err_msg(c, ret, "deleting subvolume %u", *id);
459 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
462 struct subvolume_unlink_hook {
463 struct btree_trans_commit_hook h;
467 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
468 struct btree_trans_commit_hook *_h)
470 struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
471 struct bch_fs *c = trans->c;
474 mutex_lock(&c->snapshots_unlinked_lock);
475 if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
476 ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
477 mutex_unlock(&c->snapshots_unlinked_lock);
482 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
485 if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
486 bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
490 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
492 struct btree_iter iter;
493 struct bkey_i_subvolume *n;
494 struct subvolume_unlink_hook *h;
497 h = bch2_trans_kmalloc(trans, sizeof(*h));
498 ret = PTR_ERR_OR_ZERO(h);
502 h->h.fn = bch2_subvolume_wait_for_pagecache_and_delete_hook;
503 h->subvol = subvolid;
504 bch2_trans_commit_hook(trans, &h->h);
506 n = bch2_bkey_get_mut_typed(trans, &iter,
507 BTREE_ID_subvolumes, POS(0, subvolid),
508 BTREE_ITER_CACHED, subvolume);
509 ret = PTR_ERR_OR_ZERO(n);
511 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
512 "missing subvolume %u", subvolid);
516 SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
517 bch2_trans_iter_exit(trans, &iter);
521 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
528 struct bch_fs *c = trans->c;
529 struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
530 struct bkey_i_subvolume *new_subvol = NULL;
531 struct bkey_i_subvolume *src_subvol = NULL;
532 u32 parent = 0, new_nodes[2], snapshot_subvols[2];
535 ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
536 BTREE_ID_subvolumes, POS(0, U32_MAX));
537 if (ret == -BCH_ERR_ENOSPC_btree_slot)
538 ret = -BCH_ERR_ENOSPC_subvolume_create;
542 snapshot_subvols[0] = dst_iter.pos.offset;
543 snapshot_subvols[1] = src_subvolid;
546 /* Creating a snapshot: */
548 src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
549 BTREE_ID_subvolumes, POS(0, src_subvolid),
550 BTREE_ITER_CACHED, subvolume);
551 ret = PTR_ERR_OR_ZERO(src_subvol);
553 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
554 "subvolume %u not found", src_subvolid);
558 parent = le32_to_cpu(src_subvol->v.snapshot);
561 ret = bch2_snapshot_node_create(trans, parent, new_nodes,
563 src_subvolid ? 2 : 1);
568 src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
569 ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
574 new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
575 ret = PTR_ERR_OR_ZERO(new_subvol);
579 new_subvol->v.flags = 0;
580 new_subvol->v.snapshot = cpu_to_le32(new_nodes[0]);
581 new_subvol->v.inode = cpu_to_le64(inode);
582 new_subvol->v.creation_parent = cpu_to_le32(src_subvolid);
583 new_subvol->v.fs_path_parent = cpu_to_le32(parent_subvolid);
584 new_subvol->v.otime.lo = cpu_to_le64(bch2_current_time(c));
585 new_subvol->v.otime.hi = 0;
587 SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
588 SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
590 *new_subvolid = new_subvol->k.p.offset;
591 *new_snapshotid = new_nodes[0];
593 bch2_trans_iter_exit(trans, &src_iter);
594 bch2_trans_iter_exit(trans, &dst_iter);
598 int bch2_initialize_subvolumes(struct bch_fs *c)
600 struct bkey_i_snapshot_tree root_tree;
601 struct bkey_i_snapshot root_snapshot;
602 struct bkey_i_subvolume root_volume;
605 bkey_snapshot_tree_init(&root_tree.k_i);
606 root_tree.k.p.offset = 1;
607 root_tree.v.master_subvol = cpu_to_le32(1);
608 root_tree.v.root_snapshot = cpu_to_le32(U32_MAX);
610 bkey_snapshot_init(&root_snapshot.k_i);
611 root_snapshot.k.p.offset = U32_MAX;
612 root_snapshot.v.flags = 0;
613 root_snapshot.v.parent = 0;
614 root_snapshot.v.subvol = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
615 root_snapshot.v.tree = cpu_to_le32(1);
616 SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
618 bkey_subvolume_init(&root_volume.k_i);
619 root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
620 root_volume.v.flags = 0;
621 root_volume.v.snapshot = cpu_to_le32(U32_MAX);
622 root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO);
624 ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0) ?:
625 bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0) ?:
626 bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0);
631 static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
633 struct btree_iter iter;
635 struct bch_inode_unpacked inode;
638 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
639 SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
644 if (!bkey_is_inode(k.k)) {
645 bch_err(trans->c, "root inode not found");
646 ret = -BCH_ERR_ENOENT_inode;
650 ret = bch2_inode_unpack(k, &inode);
653 inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
655 ret = bch2_inode_write(trans, &iter, &inode);
657 bch2_trans_iter_exit(trans, &iter);
661 /* set bi_subvol on root inode */
662 int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
664 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
665 __bch2_fs_upgrade_for_subvolumes(trans));
670 int bch2_fs_subvolumes_init(struct bch_fs *c)
672 INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
673 INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
674 bch2_subvolume_wait_for_pagecache_and_delete);
675 mutex_init(&c->snapshots_unlinked_lock);