fs/gfs2/util.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9 #include <linux/spinlock.h>
  10 #include <linux/completion.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/crc32.h>
  13 #include <linux/gfs2_ondisk.h>
  14 #include <linux/delay.h>
  15 #include <linux/uaccess.h>
  16
  17 #include "gfs2.h"
  18 #include "incore.h"
  19 #include "glock.h"
  20 #include "glops.h"
  21 #include "log.h"
  22 #include "lops.h"
  23 #include "recovery.h"
  24 #include "rgrp.h"
  25 #include "super.h"
  26 #include "util.h"
  27
  28 struct kmem_cache *gfs2_glock_cachep __read_mostly;
  29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
  30 struct kmem_cache *gfs2_inode_cachep __read_mostly;
  31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
  32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
  33 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
  34 struct kmem_cache *gfs2_qadata_cachep __read_mostly;
  35 struct kmem_cache *gfs2_trans_cachep __read_mostly;
  36 mempool_t *gfs2_page_pool __read_mostly;
  37
  38 void gfs2_assert_i(struct gfs2_sbd *sdp)
  39 {
  40         fs_emerg(sdp, "fatal assertion failed\n");
  41 }
  42
  43 /**
  44  * check_journal_clean - Make sure a journal is clean for a spectator mount
  45  * @sdp: The GFS2 superblock
  46  * @jd: The journal descriptor
  47  *
  48  * Returns: 0 if the journal is clean or locked, else an error
  49  */
  50 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
  51                         bool verbose)
  52 {
  53         int error;
  54         struct gfs2_holder j_gh;
  55         struct gfs2_log_header_host head;
  56         struct gfs2_inode *ip;
  57
  58         ip = GFS2_I(jd->jd_inode);
  59         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
  60                                    GL_EXACT | GL_NOCACHE, &j_gh);
  61         if (error) {
  62                 if (verbose)
  63                         fs_err(sdp, "Error %d locking journal for spectator "
  64                                "mount.\n", error);
  65                 return -EPERM;
  66         }
  67         error = gfs2_jdesc_check(jd);
  68         if (error) {
  69                 if (verbose)
  70                         fs_err(sdp, "Error checking journal for spectator "
  71                                "mount.\n");
  72                 goto out_unlock;
  73         }
  74         error = gfs2_find_jhead(jd, &head, false);
  75         if (error) {
  76                 if (verbose)
  77                         fs_err(sdp, "Error parsing journal for spectator "
  78                                "mount.\n");
  79                 goto out_unlock;
  80         }
  81         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
  82                 error = -EPERM;
  83                 if (verbose)
  84                         fs_err(sdp, "jid=%u: Journal is dirty, so the first "
  85                                "mounter must not be a spectator.\n",
  86                                jd->jd_jid);
  87         }
  88
  89 out_unlock:
  90         gfs2_glock_dq_uninit(&j_gh);
  91         return error;
  92 }
  93
  94 /**
  95  * gfs2_freeze_lock - hold the freeze glock
  96  * @sdp: the superblock
  97  * @freeze_gh: pointer to the requested holder
  98  * @caller_flags: any additional flags needed by the caller
  99  */
 100 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
 101                      int caller_flags)
 102 {
 103         int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
 104         int error;
 105
 106         error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
 107                                    freeze_gh);
 108         if (error && error != GLR_TRYFAILED)
 109                 fs_err(sdp, "can't lock the freeze lock: %d\n", error);
 110         return error;
 111 }
 112
 113 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
 114 {
 115         if (gfs2_holder_initialized(freeze_gh))
 116                 gfs2_glock_dq_uninit(freeze_gh);
 117 }
 118
 119 static void signal_our_withdraw(struct gfs2_sbd *sdp)
 120 {
 121         struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
 122         struct inode *inode;
 123         struct gfs2_inode *ip;
 124         struct gfs2_glock *i_gl;
 125         u64 no_formal_ino;
 126         int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 127         int ret = 0;
 128         int tries;
 129
 130         if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
 131                 return;
 132
 133         inode = sdp->sd_jdesc->jd_inode;
 134         ip = GFS2_I(inode);
 135         i_gl = ip->i_gl;
 136         no_formal_ino = ip->i_no_formal_ino;
 137
 138         /* Prevent any glock dq until withdraw recovery is complete */
 139         set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 140         /*
 141          * Don't tell dlm we're bailing until we have no more buffers in the
 142          * wind. If journal had an IO error, the log code should just purge
 143          * the outstanding buffers rather than submitting new IO. Making the
 144          * file system read-only will flush the journal, etc.
 145          *
 146          * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
 147          * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
 148          * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
 149          * therefore we need to clear SDF_JOURNAL_LIVE manually.
 150          */
 151         clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 152         if (!sb_rdonly(sdp->sd_vfs)) {
 153                 struct gfs2_holder freeze_gh;
 154
 155                 gfs2_holder_mark_uninitialized(&freeze_gh);
 156                 if (sdp->sd_freeze_gl &&
 157                     !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
 158                         ret = gfs2_freeze_lock(sdp, &freeze_gh,
 159                                        log_write_allowed ? 0 : LM_FLAG_TRY);
 160                         if (ret == GLR_TRYFAILED)
 161                                 ret = 0;
 162                 }
 163                 if (!ret)
 164                         ret = gfs2_make_fs_ro(sdp);
 165                 gfs2_freeze_unlock(&freeze_gh);
 166         }
 167
 168         if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
 169                 if (!ret)
 170                         ret = -EIO;
 171                 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 172                 goto skip_recovery;
 173         }
 174         /*
 175          * Drop the glock for our journal so another node can recover it.
 176          */
 177         if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
 178                 gfs2_glock_dq_wait(&sdp->sd_journal_gh);
 179                 gfs2_holder_uninit(&sdp->sd_journal_gh);
 180         }
 181         sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
 182         gfs2_glock_dq(&sdp->sd_jinode_gh);
 183         if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
 184                 /* Make sure gfs2_unfreeze works if partially-frozen */
 185                 flush_workqueue(gfs2_freeze_wq);
 186                 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 187                 thaw_super(sdp->sd_vfs);
 188         } else {
 189                 wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
 190                             TASK_UNINTERRUPTIBLE);
 191         }
 192
 193         /*
 194          * holder_uninit to force glock_put, to force dlm to let go
 195          */
 196         gfs2_holder_uninit(&sdp->sd_jinode_gh);
 197
 198         /*
 199          * Note: We need to be careful here:
 200          * Our iput of jd_inode will evict it. The evict will dequeue its
 201          * glock, but the glock dq will wait for the withdraw unless we have
 202          * exception code in glock_dq.
 203          */
 204         iput(inode);
 205         /*
 206          * Wait until the journal inode's glock is freed. This allows try locks
 207          * on other nodes to be successful, otherwise we remain the owner of
 208          * the glock as far as dlm is concerned.
 209          */
 210         if (i_gl->gl_ops->go_free) {
 211                 set_bit(GLF_FREEING, &i_gl->gl_flags);
 212                 wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
 213         }
 214
 215         /*
 216          * Dequeue the "live" glock, but keep a reference so it's never freed.
 217          */
 218         gfs2_glock_hold(live_gl);
 219         gfs2_glock_dq_wait(&sdp->sd_live_gh);
 220         /*
 221          * We enqueue the "live" glock in EX so that all other nodes
 222          * get a demote request and act on it. We don't really want the
 223          * lock in EX, so we send a "try" lock with 1CB to produce a callback.
 224          */
 225         fs_warn(sdp, "Requesting recovery of jid %d.\n",
 226                 sdp->sd_lockstruct.ls_jid);
 227         gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
 228                            &sdp->sd_live_gh);
 229         msleep(GL_GLOCK_MAX_HOLD);
 230         /*
 231          * This will likely fail in a cluster, but succeed standalone:
 232          */
 233         ret = gfs2_glock_nq(&sdp->sd_live_gh);
 234
 235         /*
 236          * If we actually got the "live" lock in EX mode, there are no other
 237          * nodes available to replay our journal. So we try to replay it
 238          * ourselves. We hold the "live" glock to prevent other mounters
 239          * during recovery, then just dequeue it and reacquire it in our
 240          * normal SH mode. Just in case the problem that caused us to
 241          * withdraw prevents us from recovering our journal (e.g. io errors
 242          * and such) we still check if the journal is clean before proceeding
 243          * but we may wait forever until another mounter does the recovery.
 244          */
 245         if (ret == 0) {
 246                 fs_warn(sdp, "No other mounters found. Trying to recover our "
 247                         "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
 248                 if (gfs2_recover_journal(sdp->sd_jdesc, 1))
 249                         fs_warn(sdp, "Unable to recover our journal jid %d.\n",
 250                                 sdp->sd_lockstruct.ls_jid);
 251                 gfs2_glock_dq_wait(&sdp->sd_live_gh);
 252                 gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
 253                                    &sdp->sd_live_gh);
 254                 gfs2_glock_nq(&sdp->sd_live_gh);
 255         }
 256
 257         gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
 258         clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 259
 260         /*
 261          * At this point our journal is evicted, so we need to get a new inode
 262          * for it. Once done, we need to call gfs2_find_jhead which
 263          * calls gfs2_map_journal_extents to map it for us again.
 264          *
 265          * Note that we don't really want it to look up a FREE block. The
 266          * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
 267          * which would otherwise fail because it requires grabbing an rgrp
 268          * glock, which would fail with -EIO because we're withdrawing.
 269          */
 270         inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
 271                                   sdp->sd_jdesc->jd_no_addr, no_formal_ino,
 272                                   GFS2_BLKST_FREE);
 273         if (IS_ERR(inode)) {
 274                 fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
 275                         sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
 276                 goto skip_recovery;
 277         }
 278         sdp->sd_jdesc->jd_inode = inode;
 279
 280         /*
 281          * Now wait until recovery is complete.
 282          */
 283         for (tries = 0; tries < 10; tries++) {
 284                 ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
 285                 if (!ret)
 286                         break;
 287                 msleep(HZ);
 288                 fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
 289                         sdp->sd_lockstruct.ls_jid);
 290         }
 291 skip_recovery:
 292         if (!ret)
 293                 fs_warn(sdp, "Journal recovery complete for jid %d.\n",
 294                         sdp->sd_lockstruct.ls_jid);
 295         else
 296                 fs_warn(sdp, "Journal recovery skipped for %d until next "
 297                         "mount.\n", sdp->sd_lockstruct.ls_jid);
 298         fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
 299         sdp->sd_glock_dqs_held = 0;
 300         wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
 301 }
 302
 303 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
 304 {
 305         struct va_format vaf;
 306         va_list args;
 307
 308         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 309             test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
 310                 return;
 311
 312         va_start(args, fmt);
 313         vaf.fmt = fmt;
 314         vaf.va = &args;
 315         fs_err(sdp, "%pV", &vaf);
 316         va_end(args);
 317 }
 318
 319 int gfs2_withdraw(struct gfs2_sbd *sdp)
 320 {
 321         struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 322         const struct lm_lockops *lm = ls->ls_ops;
 323
 324         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
 325             test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
 326                 if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
 327                         return -1;
 328
 329                 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
 330                             TASK_UNINTERRUPTIBLE);
 331                 return -1;
 332         }
 333
 334         set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 335
 336         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
 337                 fs_err(sdp, "about to withdraw this file system\n");
 338                 BUG_ON(sdp->sd_args.ar_debug);
 339
 340                 signal_our_withdraw(sdp);
 341
 342                 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 343
 344                 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
 345                         wait_for_completion(&sdp->sd_wdack);
 346
 347                 if (lm->lm_unmount) {
 348                         fs_err(sdp, "telling LM to unmount\n");
 349                         lm->lm_unmount(sdp);
 350                 }
 351                 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 352                 fs_err(sdp, "File system withdrawn\n");
 353                 dump_stack();
 354                 clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
 355                 smp_mb__after_atomic();
 356                 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
 357         }
 358
 359         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 360                 panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
 361
 362         return -1;
 363 }
 364
 365 /**
 366  * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
 367  */
 368
 369 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
 370                             const char *function, char *file, unsigned int line,
 371                             bool delayed)
 372 {
 373         if (gfs2_withdrawn(sdp))
 374                 return;
 375
 376         fs_err(sdp,
 377                "fatal: assertion \"%s\" failed\n"
 378                "   function = %s, file = %s, line = %u\n",
 379                assertion, function, file, line);
 380
 381         /*
 382          * If errors=panic was specified on mount, it won't help to delay the
 383          * withdraw.
 384          */
 385         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 386                 delayed = false;
 387
 388         if (delayed)
 389                 gfs2_withdraw_delayed(sdp);
 390         else
 391                 gfs2_withdraw(sdp);
 392         dump_stack();
 393 }
 394
 395 /**
 396  * gfs2_assert_warn_i - Print a message to the console if @assertion is false
 397  */
 398
 399 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
 400                         const char *function, char *file, unsigned int line)
 401 {
 402         if (time_before(jiffies,
 403                         sdp->sd_last_warning +
 404                         gfs2_tune_get(sdp, gt_complain_secs) * HZ))
 405                 return;
 406
 407         if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
 408                 fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
 409                         assertion, function, file, line);
 410
 411         if (sdp->sd_args.ar_debug)
 412                 BUG();
 413         else
 414                 dump_stack();
 415
 416         if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
 417                 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
 418                       "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
 419                       sdp->sd_fsname, assertion,
 420                       sdp->sd_fsname, function, file, line);
 421
 422         sdp->sd_last_warning = jiffies;
 423 }
 424
 425 /**
 426  * gfs2_consist_i - Flag a filesystem consistency error and withdraw
 427  */
 428
 429 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
 430                     char *file, unsigned int line)
 431 {
 432         gfs2_lm(sdp,
 433                 "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
 434                 function, file, line);
 435         gfs2_withdraw(sdp);
 436 }
 437
 438 /**
 439  * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
 440  */
 441
 442 void gfs2_consist_inode_i(struct gfs2_inode *ip,
 443                           const char *function, char *file, unsigned int line)
 444 {
 445         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 446
 447         gfs2_lm(sdp,
 448                 "fatal: filesystem consistency error\n"
 449                 "  inode = %llu %llu\n"
 450                 "  function = %s, file = %s, line = %u\n",
 451                 (unsigned long long)ip->i_no_formal_ino,
 452                 (unsigned long long)ip->i_no_addr,
 453                 function, file, line);
 454         gfs2_withdraw(sdp);
 455 }
 456
 457 /**
 458  * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
 459  */
 460
 461 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
 462                           const char *function, char *file, unsigned int line)
 463 {
 464         struct gfs2_sbd *sdp = rgd->rd_sbd;
 465         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
 466
 467         sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
 468         gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
 469         gfs2_lm(sdp,
 470                 "fatal: filesystem consistency error\n"
 471                 "  RG = %llu\n"
 472                 "  function = %s, file = %s, line = %u\n",
 473                 (unsigned long long)rgd->rd_addr,
 474                 function, file, line);
 475         gfs2_withdraw(sdp);
 476 }
 477
 478 /**
 479  * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
 480  * Returns: -1 if this call withdrew the machine,
 481  *          -2 if it was already withdrawn
 482  */
 483
 484 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 485                        const char *type, const char *function, char *file,
 486                        unsigned int line)
 487 {
 488         int me;
 489
 490         gfs2_lm(sdp,
 491                 "fatal: invalid metadata block\n"
 492                 "  bh = %llu (%s)\n"
 493                 "  function = %s, file = %s, line = %u\n",
 494                 (unsigned long long)bh->b_blocknr, type,
 495                 function, file, line);
 496         me = gfs2_withdraw(sdp);
 497         return (me) ? -1 : -2;
 498 }
 499
 500 /**
 501  * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
 502  * Returns: -1 if this call withdrew the machine,
 503  *          -2 if it was already withdrawn
 504  */
 505
 506 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
 507                            u16 type, u16 t, const char *function,
 508                            char *file, unsigned int line)
 509 {
 510         int me;
 511
 512         gfs2_lm(sdp,
 513                 "fatal: invalid metadata block\n"
 514                 "  bh = %llu (type: exp=%u, found=%u)\n"
 515                 "  function = %s, file = %s, line = %u\n",
 516                 (unsigned long long)bh->b_blocknr, type, t,
 517                 function, file, line);
 518         me = gfs2_withdraw(sdp);
 519         return (me) ? -1 : -2;
 520 }
 521
 522 /**
 523  * gfs2_io_error_i - Flag an I/O error and withdraw
 524  * Returns: -1 if this call withdrew the machine,
 525  *          0 if it was already withdrawn
 526  */
 527
 528 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
 529                     unsigned int line)
 530 {
 531         gfs2_lm(sdp,
 532                 "fatal: I/O error\n"
 533                 "  function = %s, file = %s, line = %u\n",
 534                 function, file, line);
 535         return gfs2_withdraw(sdp);
 536 }
 537
 538 /**
 539  * gfs2_io_error_bh_i - Flag a buffer I/O error
 540  * @withdraw: withdraw the filesystem
 541  */
 542
 543 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 544                         const char *function, char *file, unsigned int line,
 545                         bool withdraw)
 546 {
 547         if (gfs2_withdrawn(sdp))
 548                 return;
 549
 550         fs_err(sdp, "fatal: I/O error\n"
 551                "  block = %llu\n"
 552                "  function = %s, file = %s, line = %u\n",
 553                (unsigned long long)bh->b_blocknr, function, file, line);
 554         if (withdraw)
 555                 gfs2_withdraw(sdp);
 556 }
 557