recovery.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #include <linux/module.h>
   8 #include <linux/slab.h>
   9 #include <linux/spinlock.h>
  10 #include <linux/completion.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/gfs2_ondisk.h>
  13 #include <linux/crc32.h>
  14 #include <linux/crc32c.h>
  15 #include <linux/ktime.h>
  16
  17 #include "gfs2.h"
  18 #include "incore.h"
  19 #include "bmap.h"
  20 #include "glock.h"
  21 #include "glops.h"
  22 #include "log.h"
  23 #include "lops.h"
  24 #include "meta_io.h"
  25 #include "recovery.h"
  26 #include "super.h"
  27 #include "util.h"
  28 #include "dir.h"
  29
  30 struct workqueue_struct *gfs_recovery_wq;
  31
  32 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
  33                            struct buffer_head **bh)
  34 {
  35         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
  36         struct gfs2_glock *gl = ip->i_gl;
  37         u64 dblock;
  38         u32 extlen;
  39         int error;
  40
  41         extlen = 32;
  42         error = gfs2_get_extent(&ip->i_inode, blk, &dblock, &extlen);
  43         if (error)
  44                 return error;
  45         if (!dblock) {
  46                 gfs2_consist_inode(ip);
  47                 return -EIO;
  48         }
  49
  50         *bh = gfs2_meta_ra(gl, dblock, extlen);
  51
  52         return error;
  53 }
  54
  55 int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  56 {
  57         struct list_head *head = &jd->jd_revoke_list;
  58         struct gfs2_revoke_replay *rr = NULL, *iter;
  59
  60         list_for_each_entry(iter, head, rr_list) {
  61                 if (iter->rr_blkno == blkno) {
  62                         rr = iter;
  63                         break;
  64                 }
  65         }
  66
  67         if (rr) {
  68                 rr->rr_where = where;
  69                 return 0;
  70         }
  71
  72         rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
  73         if (!rr)
  74                 return -ENOMEM;
  75
  76         rr->rr_blkno = blkno;
  77         rr->rr_where = where;
  78         list_add(&rr->rr_list, head);
  79
  80         return 1;
  81 }
  82
  83 int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
  84 {
  85         struct gfs2_revoke_replay *rr = NULL, *iter;
  86         int wrap, a, b, revoke;
  87
  88         list_for_each_entry(iter, &jd->jd_revoke_list, rr_list) {
  89                 if (iter->rr_blkno == blkno) {
  90                         rr = iter;
  91                         break;
  92                 }
  93         }
  94
  95         if (!rr)
  96                 return 0;
  97
  98         wrap = (rr->rr_where < jd->jd_replay_tail);
  99         a = (jd->jd_replay_tail < where);
 100         b = (where < rr->rr_where);
 101         revoke = (wrap) ? (a || b) : (a && b);
 102
 103         return revoke;
 104 }
 105
 106 void gfs2_revoke_clean(struct gfs2_jdesc *jd)
 107 {
 108         struct list_head *head = &jd->jd_revoke_list;
 109         struct gfs2_revoke_replay *rr;
 110
 111         while (!list_empty(head)) {
 112                 rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
 113                 list_del(&rr->rr_list);
 114                 kfree(rr);
 115         }
 116 }
 117
 118 int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
 119                      unsigned int blkno, struct gfs2_log_header_host *head)
 120 {
 121         u32 hash, crc;
 122
 123         if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
 124             lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
 125             (blkno && be32_to_cpu(lh->lh_blkno) != blkno))
 126                 return 1;
 127
 128         hash = crc32(~0, lh, LH_V1_SIZE - 4);
 129         hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
 130
 131         if (be32_to_cpu(lh->lh_hash) != hash)
 132                 return 1;
 133
 134         crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
 135                      sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
 136
 137         if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
 138                 return 1;
 139
 140         head->lh_sequence = be64_to_cpu(lh->lh_sequence);
 141         head->lh_flags = be32_to_cpu(lh->lh_flags);
 142         head->lh_tail = be32_to_cpu(lh->lh_tail);
 143         head->lh_blkno = be32_to_cpu(lh->lh_blkno);
 144
 145         head->lh_local_total = be64_to_cpu(lh->lh_local_total);
 146         head->lh_local_free = be64_to_cpu(lh->lh_local_free);
 147         head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
 148
 149         return 0;
 150 }
 151 /**
 152  * get_log_header - read the log header for a given segment
 153  * @jd: the journal
 154  * @blk: the block to look at
 155  * @head: the log header to return
 156  *
 157  * Read the log header for a given segement in a given journal.  Do a few
 158  * sanity checks on it.
 159  *
 160  * Returns: 0 on success,
 161  *          1 if the header was invalid or incomplete,
 162  *          errno on error
 163  */
 164
 165 static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
 166                           struct gfs2_log_header_host *head)
 167 {
 168         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 169         struct buffer_head *bh;
 170         int error;
 171
 172         error = gfs2_replay_read_block(jd, blk, &bh);
 173         if (error)
 174                 return error;
 175
 176         error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
 177                                  blk, head);
 178         brelse(bh);
 179
 180         return error;
 181 }
 182
 183 /**
 184  * foreach_descriptor - go through the active part of the log
 185  * @jd: the journal
 186  * @start: the first log header in the active region
 187  * @end: the last log header (don't process the contents of this entry))
 188  * @pass: iteration number (foreach_descriptor() is called in a for() loop)
 189  *
 190  * Call a given function once for every log descriptor in the active
 191  * portion of the log.
 192  *
 193  * Returns: errno
 194  */
 195
 196 static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
 197                               unsigned int end, int pass)
 198 {
 199         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 200         struct buffer_head *bh;
 201         struct gfs2_log_descriptor *ld;
 202         int error = 0;
 203         u32 length;
 204         __be64 *ptr;
 205         unsigned int offset = sizeof(struct gfs2_log_descriptor);
 206         offset += sizeof(__be64) - 1;
 207         offset &= ~(sizeof(__be64) - 1);
 208
 209         while (start != end) {
 210                 error = gfs2_replay_read_block(jd, start, &bh);
 211                 if (error)
 212                         return error;
 213                 if (gfs2_meta_check(sdp, bh)) {
 214                         brelse(bh);
 215                         return -EIO;
 216                 }
 217                 ld = (struct gfs2_log_descriptor *)bh->b_data;
 218                 length = be32_to_cpu(ld->ld_length);
 219
 220                 if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
 221                         struct gfs2_log_header_host lh;
 222                         error = get_log_header(jd, start, &lh);
 223                         if (!error) {
 224                                 gfs2_replay_incr_blk(jd, &start);
 225                                 brelse(bh);
 226                                 continue;
 227                         }
 228                         if (error == 1) {
 229                                 gfs2_consist_inode(GFS2_I(jd->jd_inode));
 230                                 error = -EIO;
 231                         }
 232                         brelse(bh);
 233                         return error;
 234                 } else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
 235                         brelse(bh);
 236                         return -EIO;
 237                 }
 238                 ptr = (__be64 *)(bh->b_data + offset);
 239                 error = lops_scan_elements(jd, start, ld, ptr, pass);
 240                 if (error) {
 241                         brelse(bh);
 242                         return error;
 243                 }
 244
 245                 while (length--)
 246                         gfs2_replay_incr_blk(jd, &start);
 247
 248                 brelse(bh);
 249         }
 250
 251         return 0;
 252 }
 253
 254 /**
 255  * clean_journal - mark a dirty journal as being clean
 256  * @jd: the journal
 257  * @head: the head journal to start from
 258  *
 259  * Returns: errno
 260  */
 261
 262 static void clean_journal(struct gfs2_jdesc *jd,
 263                           struct gfs2_log_header_host *head)
 264 {
 265         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 266         u32 lblock = head->lh_blkno;
 267
 268         gfs2_replay_incr_blk(jd, &lblock);
 269         gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
 270                               GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
 271                               REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
 272         if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
 273                 sdp->sd_log_flush_head = lblock;
 274                 gfs2_log_incr_head(sdp);
 275         }
 276 }
 277
 278
 279 static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
 280                                unsigned int message)
 281 {
 282         char env_jid[20];
 283         char env_status[20];
 284         char *envp[] = { env_jid, env_status, NULL };
 285         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 286
 287         ls->ls_recover_jid_done = jid;
 288         ls->ls_recover_jid_status = message;
 289         sprintf(env_jid, "JID=%u", jid);
 290         sprintf(env_status, "RECOVERY=%s",
 291                 message == LM_RD_SUCCESS ? "Done" : "Failed");
 292         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 293
 294         if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
 295                 sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 296 }
 297
 298 /**
 299  * update_statfs_inode - Update the master statfs inode or zero out the local
 300  *                       statfs inode for a given journal.
 301  * @jd: The journal
 302  * @head: If NULL, @inode is the local statfs inode and we need to zero it out.
 303  *        Otherwise, it @head contains the statfs change info that needs to be
 304  *        synced to the master statfs inode (pointed to by @inode).
 305  * @inode: statfs inode to update.
 306  */
 307 static int update_statfs_inode(struct gfs2_jdesc *jd,
 308                                struct gfs2_log_header_host *head,
 309                                struct inode *inode)
 310 {
 311         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 312         struct gfs2_inode *ip;
 313         struct buffer_head *bh;
 314         struct gfs2_statfs_change_host sc;
 315         int error = 0;
 316
 317         BUG_ON(!inode);
 318         ip = GFS2_I(inode);
 319
 320         error = gfs2_meta_inode_buffer(ip, &bh);
 321         if (error)
 322                 goto out;
 323
 324         spin_lock(&sdp->sd_statfs_spin);
 325
 326         if (head) { /* Update the master statfs inode */
 327                 gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
 328                 sc.sc_total += head->lh_local_total;
 329                 sc.sc_free += head->lh_local_free;
 330                 sc.sc_dinodes += head->lh_local_dinodes;
 331                 gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
 332
 333                 fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
 334                         "Free:%lld, Dinodes:%lld after change "
 335                         "[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
 336                         sc.sc_free, sc.sc_dinodes, head->lh_local_total,
 337                         head->lh_local_free, head->lh_local_dinodes);
 338         } else { /* Zero out the local statfs inode */
 339                 memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
 340                        sizeof(struct gfs2_statfs_change));
 341                 /* If it's our own journal, reset any in-memory changes too */
 342                 if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
 343                         memset(&sdp->sd_statfs_local, 0,
 344                                sizeof(struct gfs2_statfs_change_host));
 345                 }
 346         }
 347         spin_unlock(&sdp->sd_statfs_spin);
 348
 349         mark_buffer_dirty(bh);
 350         brelse(bh);
 351         gfs2_inode_metasync(ip->i_gl);
 352
 353 out:
 354         return error;
 355 }
 356
 357 /**
 358  * recover_local_statfs - Update the master and local statfs changes for this
 359  *                        journal.
 360  *
 361  * Previously, statfs updates would be read in from the local statfs inode and
 362  * synced to the master statfs inode during recovery.
 363  *
 364  * We now use the statfs updates in the journal head to update the master statfs
 365  * inode instead of reading in from the local statfs inode. To preserve backward
 366  * compatibility with kernels that can't do this, we still need to keep the
 367  * local statfs inode up to date by writing changes to it. At some point in the
 368  * future, we can do away with the local statfs inodes altogether and keep the
 369  * statfs changes solely in the journal.
 370  *
 371  * @jd: the journal
 372  * @head: the journal head
 373  *
 374  * Returns: errno
 375  */
 376 static void recover_local_statfs(struct gfs2_jdesc *jd,
 377                                  struct gfs2_log_header_host *head)
 378 {
 379         int error;
 380         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 381
 382         if (!head->lh_local_total && !head->lh_local_free
 383             && !head->lh_local_dinodes) /* No change */
 384                 goto zero_local;
 385
 386          /* First update the master statfs inode with the changes we
 387           * found in the journal. */
 388         error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
 389         if (error)
 390                 goto out;
 391
 392 zero_local:
 393         /* Zero out the local statfs inode so any changes in there
 394          * are not re-recovered. */
 395         error = update_statfs_inode(jd, NULL,
 396                                     find_local_statfs_inode(sdp, jd->jd_jid));
 397 out:
 398         return;
 399 }
 400
 401 void gfs2_recover_func(struct work_struct *work)
 402 {
 403         struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
 404         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 405         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 406         struct gfs2_log_header_host head;
 407         struct gfs2_holder j_gh, ji_gh, thaw_gh;
 408         ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
 409         int ro = 0;
 410         unsigned int pass;
 411         int error = 0;
 412         int jlocked = 0;
 413
 414         if (gfs2_withdrawn(sdp)) {
 415                 fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
 416                        jd->jd_jid);
 417                 goto fail;
 418         }
 419         t_start = ktime_get();
 420         if (sdp->sd_args.ar_spectator)
 421                 goto fail;
 422         if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
 423                 fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
 424                         jd->jd_jid);
 425                 jlocked = 1;
 426                 /* Acquire the journal lock so we can do recovery */
 427
 428                 error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
 429                                           LM_ST_EXCLUSIVE,
 430                                           LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
 431                                           &j_gh);
 432                 switch (error) {
 433                 case 0:
 434                         break;
 435
 436                 case GLR_TRYFAILED:
 437                         fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
 438                         error = 0;
 439                         goto fail;
 440
 441                 default:
 442                         goto fail;
 443                 }
 444
 445                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
 446                                            LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
 447                 if (error)
 448                         goto fail_gunlock_j;
 449         } else {
 450                 fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
 451         }
 452
 453         t_jlck = ktime_get();
 454         fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
 455
 456         error = gfs2_jdesc_check(jd);
 457         if (error)
 458                 goto fail_gunlock_ji;
 459
 460         error = gfs2_find_jhead(jd, &head, true);
 461         if (error)
 462                 goto fail_gunlock_ji;
 463         t_jhd = ktime_get();
 464         fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
 465                 ktime_ms_delta(t_jhd, t_jlck));
 466
 467         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 468                 fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
 469                         jd->jd_jid);
 470
 471                 /* Acquire a shared hold on the freeze lock */
 472
 473                 error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
 474                 if (error)
 475                         goto fail_gunlock_ji;
 476
 477                 if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
 478                         ro = 1;
 479                 } else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
 480                         if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 481                                 ro = 1;
 482                 } else {
 483                         if (sb_rdonly(sdp->sd_vfs)) {
 484                                 /* check if device itself is read-only */
 485                                 ro = bdev_read_only(sdp->sd_vfs->s_bdev);
 486                                 if (!ro) {
 487                                         fs_info(sdp, "recovery required on "
 488                                                 "read-only filesystem.\n");
 489                                         fs_info(sdp, "write access will be "
 490                                                 "enabled during recovery.\n");
 491                                 }
 492                         }
 493                 }
 494
 495                 if (ro) {
 496                         fs_warn(sdp, "jid=%u: Can't replay: read-only block "
 497                                 "device\n", jd->jd_jid);
 498                         error = -EROFS;
 499                         goto fail_gunlock_thaw;
 500                 }
 501
 502                 t_tlck = ktime_get();
 503                 fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
 504                         jd->jd_jid, head.lh_tail, head.lh_blkno);
 505
 506                 /* We take the sd_log_flush_lock here primarily to prevent log
 507                  * flushes and simultaneous journal replays from stomping on
 508                  * each other wrt jd_log_bio. */
 509                 down_read(&sdp->sd_log_flush_lock);
 510                 for (pass = 0; pass < 2; pass++) {
 511                         lops_before_scan(jd, &head, pass);
 512                         error = foreach_descriptor(jd, head.lh_tail,
 513                                                    head.lh_blkno, pass);
 514                         lops_after_scan(jd, error, pass);
 515                         if (error) {
 516                                 up_read(&sdp->sd_log_flush_lock);
 517                                 goto fail_gunlock_thaw;
 518                         }
 519                 }
 520
 521                 recover_local_statfs(jd, &head);
 522                 clean_journal(jd, &head);
 523                 up_read(&sdp->sd_log_flush_lock);
 524
 525                 gfs2_freeze_unlock(&thaw_gh);
 526                 t_rep = ktime_get();
 527                 fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
 528                         "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
 529                         jd->jd_jid, ktime_ms_delta(t_rep, t_start),
 530                         ktime_ms_delta(t_jlck, t_start),
 531                         ktime_ms_delta(t_jhd, t_jlck),
 532                         ktime_ms_delta(t_tlck, t_jhd),
 533                         ktime_ms_delta(t_rep, t_tlck));
 534         }
 535
 536         gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
 537
 538         if (jlocked) {
 539                 gfs2_glock_dq_uninit(&ji_gh);
 540                 gfs2_glock_dq_uninit(&j_gh);
 541         }
 542
 543         fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
 544         goto done;
 545
 546 fail_gunlock_thaw:
 547         gfs2_freeze_unlock(&thaw_gh);
 548 fail_gunlock_ji:
 549         if (jlocked) {
 550                 gfs2_glock_dq_uninit(&ji_gh);
 551 fail_gunlock_j:
 552                 gfs2_glock_dq_uninit(&j_gh);
 553         }
 554
 555         fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 556 fail:
 557         jd->jd_recover_error = error;
 558         gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 559 done:
 560         clear_bit(JDF_RECOVERY, &jd->jd_flags);
 561         smp_mb__after_atomic();
 562         wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 563 }
 564
 565 int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
 566 {
 567         int rv;
 568
 569         if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
 570                 return -EBUSY;
 571
 572         /* we have JDF_RECOVERY, queue should always succeed */
 573         rv = queue_work(gfs_recovery_wq, &jd->jd_work);
 574         BUG_ON(!rv);
 575
 576         if (wait)
 577                 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 578                             TASK_UNINTERRUPTIBLE);
 579
 580         return wait ? jd->jd_recover_error : 0;
 581 }
 582