fs/bcachefs/sb-clean.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include "bcachefs.h"
   4 #include "btree_update_interior.h"
   5 #include "buckets.h"
   6 #include "error.h"
   7 #include "journal_io.h"
   8 #include "replicas.h"
   9 #include "sb-clean.h"
  10 #include "super-io.h"
  11
  12 /*
  13  * BCH_SB_FIELD_clean:
  14  *
  15  * Btree roots, and a few other things, are recovered from the journal after an
  16  * unclean shutdown - but after a clean shutdown, to avoid having to read the
  17  * journal, we can store them in the superblock.
  18  *
  19  * bch_sb_field_clean simply contains a list of journal entries, stored exactly
  20  * as they would be in the journal:
  21  */
  22
  23 int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *clean,
  24                                 int write)
  25 {
  26         struct jset_entry *entry;
  27         int ret;
  28
  29         for (entry = clean->start;
  30              entry < (struct jset_entry *) vstruct_end(&clean->field);
  31              entry = vstruct_next(entry)) {
  32                 if (vstruct_end(entry) > vstruct_end(&clean->field)) {
  33                         bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu",
  34                                 le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s),
  35                                 (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field));
  36                         bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun);
  37                         return -BCH_ERR_fsck_repair_unimplemented;
  38                 }
  39
  40                 ret = bch2_journal_entry_validate(c, NULL, entry,
  41                                                   le16_to_cpu(c->disk_sb.sb->version),
  42                                                   BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
  43                                                   write);
  44                 if (ret)
  45                         return ret;
  46         }
  47
  48         return 0;
  49 }
  50
  51 static struct bkey_i *btree_root_find(struct bch_fs *c,
  52                                       struct bch_sb_field_clean *clean,
  53                                       struct jset *j,
  54                                       enum btree_id id, unsigned *level)
  55 {
  56         struct bkey_i *k;
  57         struct jset_entry *entry, *start, *end;
  58
  59         if (clean) {
  60                 start = clean->start;
  61                 end = vstruct_end(&clean->field);
  62         } else {
  63                 start = j->start;
  64                 end = vstruct_last(j);
  65         }
  66
  67         for (entry = start; entry < end; entry = vstruct_next(entry))
  68                 if (entry->type == BCH_JSET_ENTRY_btree_root &&
  69                     entry->btree_id == id)
  70                         goto found;
  71
  72         return NULL;
  73 found:
  74         if (!entry->u64s)
  75                 return ERR_PTR(-EINVAL);
  76
  77         k = entry->start;
  78         *level = entry->level;
  79         return k;
  80 }
  81
  82 int bch2_verify_superblock_clean(struct bch_fs *c,
  83                                  struct bch_sb_field_clean **cleanp,
  84                                  struct jset *j)
  85 {
  86         unsigned i;
  87         struct bch_sb_field_clean *clean = *cleanp;
  88         struct printbuf buf1 = PRINTBUF;
  89         struct printbuf buf2 = PRINTBUF;
  90         int ret = 0;
  91
  92         if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
  93                         sb_clean_journal_seq_mismatch,
  94                         "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
  95                         le64_to_cpu(clean->journal_seq),
  96                         le64_to_cpu(j->seq))) {
  97                 kfree(clean);
  98                 *cleanp = NULL;
  99                 return 0;
 100         }
 101
 102         for (i = 0; i < BTREE_ID_NR; i++) {
 103                 struct bkey_i *k1, *k2;
 104                 unsigned l1 = 0, l2 = 0;
 105
 106                 k1 = btree_root_find(c, clean, NULL, i, &l1);
 107                 k2 = btree_root_find(c, NULL, j, i, &l2);
 108
 109                 if (!k1 && !k2)
 110                         continue;
 111
 112                 printbuf_reset(&buf1);
 113                 printbuf_reset(&buf2);
 114
 115                 if (k1)
 116                         bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(k1));
 117                 else
 118                         prt_printf(&buf1, "(none)");
 119
 120                 if (k2)
 121                         bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(k2));
 122                 else
 123                         prt_printf(&buf2, "(none)");
 124
 125                 mustfix_fsck_err_on(!k1 || !k2 ||
 126                                     IS_ERR(k1) ||
 127                                     IS_ERR(k2) ||
 128                                     k1->k.u64s != k2->k.u64s ||
 129                                     memcmp(k1, k2, bkey_bytes(&k1->k)) ||
 130                                     l1 != l2, c,
 131                         sb_clean_btree_root_mismatch,
 132                         "superblock btree root %u doesn't match journal after clean shutdown\n"
 133                         "sb:      l=%u %s\n"
 134                         "journal: l=%u %s\n", i,
 135                         l1, buf1.buf,
 136                         l2, buf2.buf);
 137         }
 138 fsck_err:
 139         printbuf_exit(&buf2);
 140         printbuf_exit(&buf1);
 141         return ret;
 142 }
 143
 144 struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c)
 145 {
 146         struct bch_sb_field_clean *clean, *sb_clean;
 147         int ret;
 148
 149         mutex_lock(&c->sb_lock);
 150         sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean);
 151
 152         if (fsck_err_on(!sb_clean, c,
 153                         sb_clean_missing,
 154                         "superblock marked clean but clean section not present")) {
 155                 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
 156                 c->sb.clean = false;
 157                 mutex_unlock(&c->sb_lock);
 158                 return NULL;
 159         }
 160
 161         clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field),
 162                         GFP_KERNEL);
 163         if (!clean) {
 164                 mutex_unlock(&c->sb_lock);
 165                 return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean);
 166         }
 167
 168         ret = bch2_sb_clean_validate_late(c, clean, READ);
 169         if (ret) {
 170                 mutex_unlock(&c->sb_lock);
 171                 return ERR_PTR(ret);
 172         }
 173
 174         mutex_unlock(&c->sb_lock);
 175
 176         return clean;
 177 fsck_err:
 178         mutex_unlock(&c->sb_lock);
 179         return ERR_PTR(ret);
 180 }
 181
 182 void bch2_journal_super_entries_add_common(struct bch_fs *c,
 183                                            struct jset_entry **end,
 184                                            u64 journal_seq)
 185 {
 186         percpu_down_read(&c->mark_lock);
 187
 188         if (!journal_seq) {
 189                 for (unsigned i = 0; i < ARRAY_SIZE(c->usage); i++)
 190                         bch2_fs_usage_acc_to_base(c, i);
 191         } else {
 192                 bch2_fs_usage_acc_to_base(c, journal_seq & JOURNAL_BUF_MASK);
 193         }
 194
 195         {
 196                 struct jset_entry_usage *u =
 197                         container_of(jset_entry_init(end, sizeof(*u)),
 198                                      struct jset_entry_usage, entry);
 199
 200                 u->entry.type   = BCH_JSET_ENTRY_usage;
 201                 u->entry.btree_id = BCH_FS_USAGE_inodes;
 202                 u->v            = cpu_to_le64(c->usage_base->b.nr_inodes);
 203         }
 204
 205         {
 206                 struct jset_entry_usage *u =
 207                         container_of(jset_entry_init(end, sizeof(*u)),
 208                                      struct jset_entry_usage, entry);
 209
 210                 u->entry.type   = BCH_JSET_ENTRY_usage;
 211                 u->entry.btree_id = BCH_FS_USAGE_key_version;
 212                 u->v            = cpu_to_le64(atomic64_read(&c->key_version));
 213         }
 214
 215         for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
 216                 struct jset_entry_usage *u =
 217                         container_of(jset_entry_init(end, sizeof(*u)),
 218                                      struct jset_entry_usage, entry);
 219
 220                 u->entry.type   = BCH_JSET_ENTRY_usage;
 221                 u->entry.btree_id = BCH_FS_USAGE_reserved;
 222                 u->entry.level  = i;
 223                 u->v            = cpu_to_le64(c->usage_base->persistent_reserved[i]);
 224         }
 225
 226         for (unsigned i = 0; i < c->replicas.nr; i++) {
 227                 struct bch_replicas_entry_v1 *e =
 228                         cpu_replicas_entry(&c->replicas, i);
 229                 struct jset_entry_data_usage *u =
 230                         container_of(jset_entry_init(end, sizeof(*u) + e->nr_devs),
 231                                      struct jset_entry_data_usage, entry);
 232
 233                 u->entry.type   = BCH_JSET_ENTRY_data_usage;
 234                 u->v            = cpu_to_le64(c->usage_base->replicas[i]);
 235                 unsafe_memcpy(&u->r, e, replicas_entry_bytes(e),
 236                               "embedded variable length struct");
 237         }
 238
 239         for_each_member_device(c, ca) {
 240                 unsigned b = sizeof(struct jset_entry_dev_usage) +
 241                         sizeof(struct jset_entry_dev_usage_type) * BCH_DATA_NR;
 242                 struct jset_entry_dev_usage *u =
 243                         container_of(jset_entry_init(end, b),
 244                                      struct jset_entry_dev_usage, entry);
 245
 246                 u->entry.type = BCH_JSET_ENTRY_dev_usage;
 247                 u->dev = cpu_to_le32(ca->dev_idx);
 248
 249                 for (unsigned i = 0; i < BCH_DATA_NR; i++) {
 250                         u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets);
 251                         u->d[i].sectors = cpu_to_le64(ca->usage_base->d[i].sectors);
 252                         u->d[i].fragmented = cpu_to_le64(ca->usage_base->d[i].fragmented);
 253                 }
 254         }
 255
 256         percpu_up_read(&c->mark_lock);
 257
 258         for (unsigned i = 0; i < 2; i++) {
 259                 struct jset_entry_clock *clock =
 260                         container_of(jset_entry_init(end, sizeof(*clock)),
 261                                      struct jset_entry_clock, entry);
 262
 263                 clock->entry.type = BCH_JSET_ENTRY_clock;
 264                 clock->rw       = i;
 265                 clock->time     = cpu_to_le64(atomic64_read(&c->io_clock[i].now));
 266         }
 267 }
 268
 269 static int bch2_sb_clean_validate(struct bch_sb *sb,
 270                                   struct bch_sb_field *f,
 271                                   struct printbuf *err)
 272 {
 273         struct bch_sb_field_clean *clean = field_to_type(f, clean);
 274
 275         if (vstruct_bytes(&clean->field) < sizeof(*clean)) {
 276                 prt_printf(err, "wrong size (got %zu should be %zu)",
 277                        vstruct_bytes(&clean->field), sizeof(*clean));
 278                 return -BCH_ERR_invalid_sb_clean;
 279         }
 280
 281         for (struct jset_entry *entry = clean->start;
 282              entry != vstruct_end(&clean->field);
 283              entry = vstruct_next(entry)) {
 284                 if ((void *) vstruct_next(entry) > vstruct_end(&clean->field)) {
 285                         prt_str(err, "entry type ");
 286                         bch2_prt_jset_entry_type(err, le16_to_cpu(entry->type));
 287                         prt_str(err, " overruns end of section");
 288                         return -BCH_ERR_invalid_sb_clean;
 289                 }
 290         }
 291
 292         return 0;
 293 }
 294
 295 static void bch2_sb_clean_to_text(struct printbuf *out, struct bch_sb *sb,
 296                                   struct bch_sb_field *f)
 297 {
 298         struct bch_sb_field_clean *clean = field_to_type(f, clean);
 299         struct jset_entry *entry;
 300
 301         prt_printf(out, "flags:          %x",   le32_to_cpu(clean->flags));
 302         prt_newline(out);
 303         prt_printf(out, "journal_seq:    %llu", le64_to_cpu(clean->journal_seq));
 304         prt_newline(out);
 305
 306         for (entry = clean->start;
 307              entry != vstruct_end(&clean->field);
 308              entry = vstruct_next(entry)) {
 309                 if ((void *) vstruct_next(entry) > vstruct_end(&clean->field))
 310                         break;
 311
 312                 if (entry->type == BCH_JSET_ENTRY_btree_keys &&
 313                     !entry->u64s)
 314                         continue;
 315
 316                 bch2_journal_entry_to_text(out, NULL, entry);
 317                 prt_newline(out);
 318         }
 319 }
 320
 321 const struct bch_sb_field_ops bch_sb_field_ops_clean = {
 322         .validate       = bch2_sb_clean_validate,
 323         .to_text        = bch2_sb_clean_to_text,
 324 };
 325
 326 int bch2_fs_mark_dirty(struct bch_fs *c)
 327 {
 328         int ret;
 329
 330         /*
 331          * Unconditionally write superblock, to verify it hasn't changed before
 332          * we go rw:
 333          */
 334
 335         mutex_lock(&c->sb_lock);
 336         SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
 337         c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
 338
 339         ret = bch2_write_super(c);
 340         mutex_unlock(&c->sb_lock);
 341
 342         return ret;
 343 }
 344
 345 void bch2_fs_mark_clean(struct bch_fs *c)
 346 {
 347         struct bch_sb_field_clean *sb_clean;
 348         struct jset_entry *entry;
 349         unsigned u64s;
 350         int ret;
 351
 352         mutex_lock(&c->sb_lock);
 353         if (BCH_SB_CLEAN(c->disk_sb.sb))
 354                 goto out;
 355
 356         SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
 357
 358         c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
 359         c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_metadata);
 360         c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_extents_above_btree_updates));
 361         c->disk_sb.sb->features[0] &= cpu_to_le64(~(1ULL << BCH_FEATURE_btree_updates_journalled));
 362
 363         u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;
 364
 365         sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s);
 366         if (!sb_clean) {
 367                 bch_err(c, "error resizing superblock while setting filesystem clean");
 368                 goto out;
 369         }
 370
 371         sb_clean->flags         = 0;
 372         sb_clean->journal_seq   = cpu_to_le64(atomic64_read(&c->journal.seq));
 373
 374         /* Trying to catch outstanding bug: */
 375         BUG_ON(le64_to_cpu(sb_clean->journal_seq) > S64_MAX);
 376
 377         entry = sb_clean->start;
 378         bch2_journal_super_entries_add_common(c, &entry, 0);
 379         entry = bch2_btree_roots_to_journal_entries(c, entry, 0);
 380         BUG_ON((void *) entry > vstruct_end(&sb_clean->field));
 381
 382         memset(entry, 0,
 383                vstruct_end(&sb_clean->field) - (void *) entry);
 384
 385         /*
 386          * this should be in the write path, and we should be validating every
 387          * superblock section:
 388          */
 389         ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE);
 390         if (ret) {
 391                 bch_err(c, "error writing marking filesystem clean: validate error");
 392                 goto out;
 393         }
 394
 395         bch2_write_super(c);
 396 out:
 397         mutex_unlock(&c->sb_lock);
 398 }