fs/gfs2/util.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9 #include <linux/spinlock.h>
  10 #include <linux/completion.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/crc32.h>
  13 #include <linux/gfs2_ondisk.h>
  14 #include <linux/delay.h>
  15 #include <linux/uaccess.h>
  16
  17 #include "gfs2.h"
  18 #include "incore.h"
  19 #include "glock.h"
  20 #include "glops.h"
  21 #include "log.h"
  22 #include "lops.h"
  23 #include "recovery.h"
  24 #include "rgrp.h"
  25 #include "super.h"
  26 #include "util.h"
  27
  28 struct kmem_cache *gfs2_glock_cachep __read_mostly;
  29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
  30 struct kmem_cache *gfs2_inode_cachep __read_mostly;
  31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
  32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
  33 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
  34 struct kmem_cache *gfs2_qadata_cachep __read_mostly;
  35 struct kmem_cache *gfs2_trans_cachep __read_mostly;
  36 mempool_t *gfs2_page_pool __read_mostly;
  37
  38 void gfs2_assert_i(struct gfs2_sbd *sdp)
  39 {
  40         fs_emerg(sdp, "fatal assertion failed\n");
  41 }
  42
  43 /**
  44  * check_journal_clean - Make sure a journal is clean for a spectator mount
  45  * @sdp: The GFS2 superblock
  46  * @jd: The journal descriptor
  47  *
  48  * Returns: 0 if the journal is clean or locked, else an error
  49  */
  50 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  51                         bool verbose)
  52 {
  53         int error;
  54         struct gfs2_holder j_gh;
  55         struct gfs2_log_header_host head;
  56         struct gfs2_inode *ip;
  57
  58         ip = GFS2_I(jd->jd_inode);
  59         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
  60                                    GL_EXACT | GL_NOCACHE, &j_gh);
  61         if (error) {
  62                 if (verbose)
  63                         fs_err(sdp, "Error %d locking journal for spectator "
  64                                "mount.\n", error);
  65                 return -EPERM;
  66         }
  67         error = gfs2_jdesc_check(jd);
  68         if (error) {
  69                 if (verbose)
  70                         fs_err(sdp, "Error checking journal for spectator "
  71                                "mount.\n");
  72                 goto out_unlock;
  73         }
  74         error = gfs2_find_jhead(jd, &head, false);
  75         if (error) {
  76                 if (verbose)
  77                         fs_err(sdp, "Error parsing journal for spectator "
  78                                "mount.\n");
  79                 goto out_unlock;
  80         }
  81         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  82                 error = -EPERM;
  83                 if (verbose)
  84                         fs_err(sdp, "jid=%u: Journal is dirty, so the first "
  85                                "mounter must not be a spectator.\n",
  86                                jd->jd_jid);
  87         }
  88
  89 out_unlock:
  90         gfs2_glock_dq_uninit(&j_gh);
  91         return error;
  92 }
  93
  94 /**
  95  * gfs2_freeze_lock - hold the freeze glock
  96  * @sdp: the superblock
  97  * @freeze_gh: pointer to the requested holder
  98  * @caller_flags: any additional flags needed by the caller
  99  */
 100 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
 101                      int caller_flags)
 102 {
 103         int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
 104         int error;
 105
 106         error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
 107                                    freeze_gh);
 108         if (error && error != GLR_TRYFAILED)
 109                 fs_err(sdp, "can't lock the freeze lock: %d\n", error);
 110         return error;
 111 }
 112
 113 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
 114 {
 115         if (gfs2_holder_initialized(freeze_gh))
 116                 gfs2_glock_dq_uninit(freeze_gh);
 117 }
 118
 119 static void signal_our_withdraw(struct gfs2_sbd *sdp)
 120 {
 121         struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
 122         struct inode *inode;
 123         struct gfs2_inode *ip;
 124         struct gfs2_glock *i_gl;
 125         u64 no_formal_ino;
 126         int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 127         int ret = 0;
 128         int tries;
 129
 130         if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
 131                 return;
 132
 133         inode = sdp->sd_jdesc->jd_inode;
 134         ip = GFS2_I(inode);
 135         i_gl = ip->i_gl;
 136         no_formal_ino = ip->i_no_formal_ino;
 137
 138         /* Prevent any glock dq until withdraw recovery is complete */
 139         set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 140         /*
 141          * Don't tell dlm we're bailing until we have no more buffers in the
 142          * wind. If journal had an IO error, the log code should just purge
 143          * the outstanding buffers rather than submitting new IO. Making the
 144          * file system read-only will flush the journal, etc.
 145          *
 146          * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
 147          * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
 148          * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
 149          * therefore we need to clear SDF_JOURNAL_LIVE manually.
 150          */
 151         clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 152         if (!sb_rdonly(sdp->sd_vfs)) {
 153                 struct gfs2_holder freeze_gh;
 154
 155                 gfs2_holder_mark_uninitialized(&freeze_gh);
 156                 if (sdp->sd_freeze_gl &&
 157                     !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
 158                         ret = gfs2_freeze_lock(sdp, &freeze_gh,
 159                                        log_write_allowed ? 0 : LM_FLAG_TRY);
 160                         if (ret == GLR_TRYFAILED)
 161                                 ret = 0;
 162                 }
 163                 if (!ret)
 164                         ret = gfs2_make_fs_ro(sdp);
 165                 gfs2_freeze_unlock(&freeze_gh);
 166         }
 167
 168         if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
 169                 if (!ret)
 170                         ret = -EIO;
 171                 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 172                 goto skip_recovery;
 173         }
 174         /*
 175          * Drop the glock for our journal so another node can recover it.
 176          */
 177         if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
 178                 gfs2_glock_dq_wait(&sdp->sd_journal_gh);
 179                 gfs2_holder_uninit(&sdp->sd_journal_gh);
 180         }
 181         sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
 182         gfs2_glock_dq(&sdp->sd_jinode_gh);
 183         if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
 184                 /* Make sure gfs2_unfreeze works if partially-frozen */
 185                 flush_workqueue(gfs2_freeze_wq);
 186                 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 187                 thaw_super(sdp->sd_vfs);
 188         } else {
 189                 wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
 190                             TASK_UNINTERRUPTIBLE);
 191         }
 192
 193         /*
 194          * holder_uninit to force glock_put, to force dlm to let go
 195          */
 196         gfs2_holder_uninit(&sdp->sd_jinode_gh);
 197
 198         /*
 199          * Note: We need to be careful here:
 200          * Our iput of jd_inode will evict it. The evict will dequeue its
 201          * glock, but the glock dq will wait for the withdraw unless we have
 202          * exception code in glock_dq.
 203          */
 204         iput(inode);
 205         /*
 206          * Wait until the journal inode's glock is freed. This allows try locks
 207          * on other nodes to be successful, otherwise we remain the owner of
 208          * the glock as far as dlm is concerned.
 209          */
 210         if (i_gl->gl_ops->go_free) {
 211                 set_bit(GLF_FREEING, &i_gl->gl_flags);
 212                 wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
 213         }
 214
 215         /*
 216          * Dequeue the "live" glock, but keep a reference so it's never freed.
 217          */
 218         gfs2_glock_hold(live_gl);
 219         gfs2_glock_dq_wait(&sdp->sd_live_gh);
 220         /*
 221          * We enqueue the "live" glock in EX so that all other nodes
 222          * get a demote request and act on it. We don't really want the
 223          * lock in EX, so we send a "try" lock with 1CB to produce a callback.
 224          */
 225         fs_warn(sdp, "Requesting recovery of jid %d.\n",
 226                 sdp->sd_lockstruct.ls_jid);
 227         gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
 228                            &sdp->sd_live_gh);
 229         msleep(GL_GLOCK_MAX_HOLD);
 230         /*
 231          * This will likely fail in a cluster, but succeed standalone:
 232          */
 233         ret = gfs2_glock_nq(&sdp->sd_live_gh);
 234
 235         /*
 236          * If we actually got the "live" lock in EX mode, there are no other
 237          * nodes available to replay our journal. So we try to replay it
 238          * ourselves. We hold the "live" glock to prevent other mounters
 239          * during recovery, then just dequeue it and reacquire it in our
 240          * normal SH mode. Just in case the problem that caused us to
 241          * withdraw prevents us from recovering our journal (e.g. io errors
 242          * and such) we still check if the journal is clean before proceeding
 243          * but we may wait forever until another mounter does the recovery.
 244          */
 245         if (ret == 0) {
 246                 fs_warn(sdp, "No other mounters found. Trying to recover our "
 247                         "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
 248                 if (gfs2_recover_journal(sdp->sd_jdesc, 1))
 249                         fs_warn(sdp, "Unable to recover our journal jid %d.\n",
 250                                 sdp->sd_lockstruct.ls_jid);
 251                 gfs2_glock_dq_wait(&sdp->sd_live_gh);
 252                 gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
 253                                    &sdp->sd_live_gh);
 254                 gfs2_glock_nq(&sdp->sd_live_gh);
 255         }
 256
 257         gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
 258         clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 259
 260         /*
 261          * At this point our journal is evicted, so we need to get a new inode
 262          * for it. Once done, we need to call gfs2_find_jhead which
 263          * calls gfs2_map_journal_extents to map it for us again.
 264          *
 265          * Note that we don't really want it to look up a FREE block. The
 266          * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
 267          * which would otherwise fail because it requires grabbing an rgrp
 268          * glock, which would fail with -EIO because we're withdrawing.
 269          */
 270         inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
 271                                   sdp->sd_jdesc->jd_no_addr, no_formal_ino,
 272                                   GFS2_BLKST_FREE);
 273         if (IS_ERR(inode)) {
 274                 fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
 275                         sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
 276                 goto skip_recovery;
 277         }
 278         sdp->sd_jdesc->jd_inode = inode;
 279
 280         /*
 281          * Now wait until recovery is complete.
 282          */
 283         for (tries = 0; tries < 10; tries++) {
 284                 ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
 285                 if (!ret)
 286                         break;
 287                 msleep(HZ);
 288                 fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
 289                         sdp->sd_lockstruct.ls_jid);
 290         }
 291 skip_recovery:
 292         if (!ret)
 293                 fs_warn(sdp, "Journal recovery complete for jid %d.\n",
 294                         sdp->sd_lockstruct.ls_jid);
 295         else
 296                 fs_warn(sdp, "Journal recovery skipped for %d until next "
 297                         "mount.\n", sdp->sd_lockstruct.ls_jid);
 298         fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
 299         sdp->sd_glock_dqs_held = 0;
 300         wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
 301 }
 302
 303 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
 304 {
 305         struct va_format vaf;
 306         va_list args;
 307
 308         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 309             test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
 310                 return;
 311
 312         va_start(args, fmt);
 313         vaf.fmt = fmt;
 314         vaf.va = &args;
 315         fs_err(sdp, "%pV", &vaf);
 316         va_end(args);
 317 }
 318
 319 int gfs2_withdraw(struct gfs2_sbd *sdp)
 320 {
 321         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 322         const struct lm_lockops *lm = ls->ls_ops;
 323
 324         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 325             test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
 326                 if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
 327                         return -1;
 328
 329                 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
 330                             TASK_UNINTERRUPTIBLE);
 331                 return -1;
 332         }
 333
 334         set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 335
 336         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 337                 fs_err(sdp, "about to withdraw this file system\n");
 338                 BUG_ON(sdp->sd_args.ar_debug);
 339
 340                 signal_our_withdraw(sdp);
 341
 342                 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 343
 344                 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
 345                         wait_for_completion(&sdp->sd_wdack);
 346
 347                 if (lm->lm_unmount) {
 348                         fs_err(sdp, "telling LM to unmount\n");
 349                         lm->lm_unmount(sdp);
 350                 }
 351                 fs_err(sdp, "File system withdrawn\n");
 352                 dump_stack();
 353                 clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 354                 smp_mb__after_atomic();
 355                 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
 356         }
 357
 358         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 359                 panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
 360
 361         return -1;
 362 }
 363
 364 /**
 365  * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
 366  */
 367
 368 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
 369                             const char *function, char *file, unsigned int line,
 370                             bool delayed)
 371 {
 372         if (gfs2_withdrawn(sdp))
 373                 return;
 374
 375         fs_err(sdp,
 376                "fatal: assertion \"%s\" failed\n"
 377                "   function = %s, file = %s, line = %u\n",
 378                assertion, function, file, line);
 379
 380         /*
 381          * If errors=panic was specified on mount, it won't help to delay the
 382          * withdraw.
 383          */
 384         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 385                 delayed = false;
 386
 387         if (delayed)
 388                 gfs2_withdraw_delayed(sdp);
 389         else
 390                 gfs2_withdraw(sdp);
 391         dump_stack();
 392 }
 393
 394 /**
 395  * gfs2_assert_warn_i - Print a message to the console if @assertion is false
 396  */
 397
 398 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 399                         const char *function, char *file, unsigned int line)
 400 {
 401         if (time_before(jiffies,
 402                         sdp->sd_last_warning +
 403                         gfs2_tune_get(sdp, gt_complain_secs) * HZ))
 404                 return;
 405
 406         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
 407                 fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
 408                         assertion, function, file, line);
 409
 410         if (sdp->sd_args.ar_debug)
 411                 BUG();
 412         else
 413                 dump_stack();
 414
 415         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 416                 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
 417                       "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 418                       sdp->sd_fsname, assertion,
 419                       sdp->sd_fsname, function, file, line);
 420
 421         sdp->sd_last_warning = jiffies;
 422 }
 423
 424 /**
 425  * gfs2_consist_i - Flag a filesystem consistency error and withdraw
 426  */
 427
 428 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
 429                     char *file, unsigned int line)
 430 {
 431         gfs2_lm(sdp,
 432                 "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
 433                 function, file, line);
 434         gfs2_withdraw(sdp);
 435 }
 436
 437 /**
 438  * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
 439  */
 440
 441 void gfs2_consist_inode_i(struct gfs2_inode *ip,
 442                           const char *function, char *file, unsigned int line)
 443 {
 444         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 445
 446         gfs2_lm(sdp,
 447                 "fatal: filesystem consistency error\n"
 448                 "  inode = %llu %llu\n"
 449                 "  function = %s, file = %s, line = %u\n",
 450                 (unsigned long long)ip->i_no_formal_ino,
 451                 (unsigned long long)ip->i_no_addr,
 452                 function, file, line);
 453         gfs2_withdraw(sdp);
 454 }
 455
 456 /**
 457  * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
 458  */
 459
 460 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
 461                           const char *function, char *file, unsigned int line)
 462 {
 463         struct gfs2_sbd *sdp = rgd->rd_sbd;
 464         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 465
 466         sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
 467         gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
 468         gfs2_lm(sdp,
 469                 "fatal: filesystem consistency error\n"
 470                 "  RG = %llu\n"
 471                 "  function = %s, file = %s, line = %u\n",
 472                 (unsigned long long)rgd->rd_addr,
 473                 function, file, line);
 474         gfs2_withdraw(sdp);
 475 }
 476
 477 /**
 478  * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
 479  * Returns: -1 if this call withdrew the machine,
 480  *          -2 if it was already withdrawn
 481  */
 482
 483 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 484                        const char *type, const char *function, char *file,
 485                        unsigned int line)
 486 {
 487         int me;
 488
 489         gfs2_lm(sdp,
 490                 "fatal: invalid metadata block\n"
 491                 "  bh = %llu (%s)\n"
 492                 "  function = %s, file = %s, line = %u\n",
 493                 (unsigned long long)bh->b_blocknr, type,
 494                 function, file, line);
 495         me = gfs2_withdraw(sdp);
 496         return (me) ? -1 : -2;
 497 }
 498
 499 /**
 500  * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
 501  * Returns: -1 if this call withdrew the machine,
 502  *          -2 if it was already withdrawn
 503  */
 504
 505 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 506                            u16 type, u16 t, const char *function,
 507                            char *file, unsigned int line)
 508 {
 509         int me;
 510
 511         gfs2_lm(sdp,
 512                 "fatal: invalid metadata block\n"
 513                 "  bh = %llu (type: exp=%u, found=%u)\n"
 514                 "  function = %s, file = %s, line = %u\n",
 515                 (unsigned long long)bh->b_blocknr, type, t,
 516                 function, file, line);
 517         me = gfs2_withdraw(sdp);
 518         return (me) ? -1 : -2;
 519 }
 520
 521 /**
 522  * gfs2_io_error_i - Flag an I/O error and withdraw
 523  * Returns: -1 if this call withdrew the machine,
 524  *          0 if it was already withdrawn
 525  */
 526
 527 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 528                     unsigned int line)
 529 {
 530         gfs2_lm(sdp,
 531                 "fatal: I/O error\n"
 532                 "  function = %s, file = %s, line = %u\n",
 533                 function, file, line);
 534         return gfs2_withdraw(sdp);
 535 }
 536
 537 /**
 538  * gfs2_io_error_bh_i - Flag a buffer I/O error
 539  * @withdraw: withdraw the filesystem
 540  */
 541
 542 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 543                         const char *function, char *file, unsigned int line,
 544                         bool withdraw)
 545 {
 546         if (gfs2_withdrawn(sdp))
 547                 return;
 548
 549         fs_err(sdp, "fatal: I/O error\n"
 550                "  block = %llu\n"
 551                "  function = %s, file = %s, line = %u\n",
 552                (unsigned long long)bh->b_blocknr, function, file, line);
 553         if (withdraw)
 554                 gfs2_withdraw(sdp);
 555 }
 556