fs/jbd2/checkpoint.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * linux/fs/jbd2/checkpoint.c
   4  *
   5  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
   6  *
   7  * Copyright 1999 Red Hat Software --- All Rights Reserved
   8  *
   9  * Checkpoint routines for the generic filesystem journaling code.
  10  * Part of the ext2fs journaling system.
  11  *
  12  * Checkpointing is the process of ensuring that a section of the log is
  13  * committed fully to disk, so that that portion of the log can be
  14  * reused.
  15  */
  16
  17 #include <linux/time.h>
  18 #include <linux/fs.h>
  19 #include <linux/jbd2.h>
  20 #include <linux/errno.h>
  21 #include <linux/slab.h>
  22 #include <linux/blkdev.h>
  23 #include <trace/events/jbd2.h>
  24
  25 /*
  26  * Unlink a buffer from a transaction checkpoint list.
  27  *
  28  * Called with j_list_lock held.
  29  */
  30 static inline void __buffer_unlink_first(struct journal_head *jh)
  31 {
  32         transaction_t *transaction = jh->b_cp_transaction;
  33
  34         jh->b_cpnext->b_cpprev = jh->b_cpprev;
  35         jh->b_cpprev->b_cpnext = jh->b_cpnext;
  36         if (transaction->t_checkpoint_list == jh) {
  37                 transaction->t_checkpoint_list = jh->b_cpnext;
  38                 if (transaction->t_checkpoint_list == jh)
  39                         transaction->t_checkpoint_list = NULL;
  40         }
  41 }
  42
  43 /*
  44  * Unlink a buffer from a transaction checkpoint(io) list.
  45  *
  46  * Called with j_list_lock held.
  47  */
  48 static inline void __buffer_unlink(struct journal_head *jh)
  49 {
  50         transaction_t *transaction = jh->b_cp_transaction;
  51
  52         __buffer_unlink_first(jh);
  53         if (transaction->t_checkpoint_io_list == jh) {
  54                 transaction->t_checkpoint_io_list = jh->b_cpnext;
  55                 if (transaction->t_checkpoint_io_list == jh)
  56                         transaction->t_checkpoint_io_list = NULL;
  57         }
  58 }
  59
  60 /*
  61  * Try to release a checkpointed buffer from its transaction.
  62  * Returns 1 if we released it and 2 if we also released the
  63  * whole transaction.
  64  *
  65  * Requires j_list_lock
  66  */
  67 static int __try_to_free_cp_buf(struct journal_head *jh)
  68 {
  69         int ret = 0;
  70         struct buffer_head *bh = jh2bh(jh);
  71
  72         if (!jh->b_transaction && !buffer_locked(bh) && !buffer_dirty(bh)) {
  73                 JBUFFER_TRACE(jh, "remove from checkpoint list");
  74                 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
  75         }
  76         return ret;
  77 }
  78
  79 /*
  80  * __jbd2_log_wait_for_space: wait until there is space in the journal.
  81  *
  82  * Called under j-state_lock *only*.  It will be unlocked if we have to wait
  83  * for a checkpoint to free up some space in the log.
  84  */
  85 void __jbd2_log_wait_for_space(journal_t *journal)
  86 __acquires(&journal->j_state_lock)
  87 __releases(&journal->j_state_lock)
  88 {
  89         int nblocks, space_left;
  90         /* assert_spin_locked(&journal->j_state_lock); */
  91
  92         nblocks = jbd2_space_needed(journal);
  93         while (jbd2_log_space_left(journal) < nblocks) {
  94                 write_unlock(&journal->j_state_lock);
  95                 mutex_lock_io(&journal->j_checkpoint_mutex);
  96
  97                 /*
  98                  * Test again, another process may have checkpointed while we
  99                  * were waiting for the checkpoint lock. If there are no
 100                  * transactions ready to be checkpointed, try to recover
 101                  * journal space by calling cleanup_journal_tail(), and if
 102                  * that doesn't work, by waiting for the currently committing
 103                  * transaction to complete.  If there is absolutely no way
 104                  * to make progress, this is either a BUG or corrupted
 105                  * filesystem, so abort the journal and leave a stack
 106                  * trace for forensic evidence.
 107                  */
 108                 write_lock(&journal->j_state_lock);
 109                 if (journal->j_flags & JBD2_ABORT) {
 110                         mutex_unlock(&journal->j_checkpoint_mutex);
 111                         return;
 112                 }
 113                 spin_lock(&journal->j_list_lock);
 114                 space_left = jbd2_log_space_left(journal);
 115                 if (space_left < nblocks) {
 116                         int chkpt = journal->j_checkpoint_transactions != NULL;
 117                         tid_t tid = 0;
 118
 119                         if (journal->j_committing_transaction)
 120                                 tid = journal->j_committing_transaction->t_tid;
 121                         spin_unlock(&journal->j_list_lock);
 122                         write_unlock(&journal->j_state_lock);
 123                         if (chkpt) {
 124                                 jbd2_log_do_checkpoint(journal);
 125                         } else if (jbd2_cleanup_journal_tail(journal) == 0) {
 126                                 /* We were able to recover space; yay! */
 127                                 ;
 128                         } else if (tid) {
 129                                 /*
 130                                  * jbd2_journal_commit_transaction() may want
 131                                  * to take the checkpoint_mutex if JBD2_FLUSHED
 132                                  * is set.  So we need to temporarily drop it.
 133                                  */
 134                                 mutex_unlock(&journal->j_checkpoint_mutex);
 135                                 jbd2_log_wait_commit(journal, tid);
 136                                 write_lock(&journal->j_state_lock);
 137                                 continue;
 138                         } else {
 139                                 printk(KERN_ERR "%s: needed %d blocks and "
 140                                        "only had %d space available\n",
 141                                        __func__, nblocks, space_left);
 142                                 printk(KERN_ERR "%s: no way to get more "
 143                                        "journal space in %s\n", __func__,
 144                                        journal->j_devname);
 145                                 WARN_ON(1);
 146                                 jbd2_journal_abort(journal, -EIO);
 147                         }
 148                         write_lock(&journal->j_state_lock);
 149                 } else {
 150                         spin_unlock(&journal->j_list_lock);
 151                 }
 152                 mutex_unlock(&journal->j_checkpoint_mutex);
 153         }
 154 }
 155
 156 static void
 157 __flush_batch(journal_t *journal, int *batch_count)
 158 {
 159         int i;
 160         struct blk_plug plug;
 161
 162         blk_start_plug(&plug);
 163         for (i = 0; i < *batch_count; i++)
 164                 write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC);
 165         blk_finish_plug(&plug);
 166
 167         for (i = 0; i < *batch_count; i++) {
 168                 struct buffer_head *bh = journal->j_chkpt_bhs[i];
 169                 BUFFER_TRACE(bh, "brelse");
 170                 __brelse(bh);
 171                 journal->j_chkpt_bhs[i] = NULL;
 172         }
 173         *batch_count = 0;
 174 }
 175
 176 /*
 177  * Perform an actual checkpoint. We take the first transaction on the
 178  * list of transactions to be checkpointed and send all its buffers
 179  * to disk. We submit larger chunks of data at once.
 180  *
 181  * The journal should be locked before calling this function.
 182  * Called with j_checkpoint_mutex held.
 183  */
 184 int jbd2_log_do_checkpoint(journal_t *journal)
 185 {
 186         struct journal_head     *jh;
 187         struct buffer_head      *bh;
 188         transaction_t           *transaction;
 189         tid_t                   this_tid;
 190         int                     result, batch_count = 0;
 191
 192         jbd_debug(1, "Start checkpoint\n");
 193
 194         /*
 195          * First thing: if there are any transactions in the log which
 196          * don't need checkpointing, just eliminate them from the
 197          * journal straight away.
 198          */
 199         result = jbd2_cleanup_journal_tail(journal);
 200         trace_jbd2_checkpoint(journal, result);
 201         jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 202         if (result <= 0)
 203                 return result;
 204
 205         /*
 206          * OK, we need to start writing disk blocks.  Take one transaction
 207          * and write it.
 208          */
 209         spin_lock(&journal->j_list_lock);
 210         if (!journal->j_checkpoint_transactions)
 211                 goto out;
 212         transaction = journal->j_checkpoint_transactions;
 213         if (transaction->t_chp_stats.cs_chp_time == 0)
 214                 transaction->t_chp_stats.cs_chp_time = jiffies;
 215         this_tid = transaction->t_tid;
 216 restart:
 217         /*
 218          * If someone cleaned up this transaction while we slept, we're
 219          * done (maybe it's a new transaction, but it fell at the same
 220          * address).
 221          */
 222         if (journal->j_checkpoint_transactions != transaction ||
 223             transaction->t_tid != this_tid)
 224                 goto out;
 225
 226         /* checkpoint all of the transaction's buffers */
 227         while (transaction->t_checkpoint_list) {
 228                 jh = transaction->t_checkpoint_list;
 229                 bh = jh2bh(jh);
 230
 231                 if (jh->b_transaction != NULL) {
 232                         transaction_t *t = jh->b_transaction;
 233                         tid_t tid = t->t_tid;
 234
 235                         transaction->t_chp_stats.cs_forced_to_close++;
 236                         spin_unlock(&journal->j_list_lock);
 237                         if (unlikely(journal->j_flags & JBD2_UNMOUNT))
 238                                 /*
 239                                  * The journal thread is dead; so
 240                                  * starting and waiting for a commit
 241                                  * to finish will cause us to wait for
 242                                  * a _very_ long time.
 243                                  */
 244                                 printk(KERN_ERR
 245                 "JBD2: %s: Waiting for Godot: block %llu\n",
 246                 journal->j_devname, (unsigned long long) bh->b_blocknr);
 247
 248                         if (batch_count)
 249                                 __flush_batch(journal, &batch_count);
 250                         jbd2_log_start_commit(journal, tid);
 251                         /*
 252                          * jbd2_journal_commit_transaction() may want
 253                          * to take the checkpoint_mutex if JBD2_FLUSHED
 254                          * is set, jbd2_update_log_tail() called by
 255                          * jbd2_journal_commit_transaction() may also take
 256                          * checkpoint_mutex.  So we need to temporarily
 257                          * drop it.
 258                          */
 259                         mutex_unlock(&journal->j_checkpoint_mutex);
 260                         jbd2_log_wait_commit(journal, tid);
 261                         mutex_lock_io(&journal->j_checkpoint_mutex);
 262                         spin_lock(&journal->j_list_lock);
 263                         goto restart;
 264                 }
 265                 if (!trylock_buffer(bh)) {
 266                         /*
 267                          * The buffer is locked, it may be writing back, or
 268                          * flushing out in the last couple of cycles, or
 269                          * re-adding into a new transaction, need to check
 270                          * it again until it's unlocked.
 271                          */
 272                         get_bh(bh);
 273                         spin_unlock(&journal->j_list_lock);
 274                         wait_on_buffer(bh);
 275                         /* the journal_head may have gone by now */
 276                         BUFFER_TRACE(bh, "brelse");
 277                         __brelse(bh);
 278                         goto retry;
 279                 } else if (!buffer_dirty(bh)) {
 280                         unlock_buffer(bh);
 281                         BUFFER_TRACE(bh, "remove from checkpoint");
 282                         /*
 283                          * If the transaction was released or the checkpoint
 284                          * list was empty, we're done.
 285                          */
 286                         if (__jbd2_journal_remove_checkpoint(jh) ||
 287                             !transaction->t_checkpoint_list)
 288                                 goto out;
 289                 } else {
 290                         unlock_buffer(bh);
 291                         /*
 292                          * We are about to write the buffer, it could be
 293                          * raced by some other transaction shrink or buffer
 294                          * re-log logic once we release the j_list_lock,
 295                          * leave it on the checkpoint list and check status
 296                          * again to make sure it's clean.
 297                          */
 298                         BUFFER_TRACE(bh, "queue");
 299                         get_bh(bh);
 300                         J_ASSERT_BH(bh, !buffer_jwrite(bh));
 301                         journal->j_chkpt_bhs[batch_count++] = bh;
 302                         transaction->t_chp_stats.cs_written++;
 303                         transaction->t_checkpoint_list = jh->b_cpnext;
 304                 }
 305
 306                 if ((batch_count == JBD2_NR_BATCH) ||
 307                     need_resched() || spin_needbreak(&journal->j_list_lock) ||
 308                     jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
 309                         goto unlock_and_flush;
 310         }
 311
 312         if (batch_count) {
 313                 unlock_and_flush:
 314                         spin_unlock(&journal->j_list_lock);
 315                 retry:
 316                         if (batch_count)
 317                                 __flush_batch(journal, &batch_count);
 318                         spin_lock(&journal->j_list_lock);
 319                         goto restart;
 320         }
 321
 322 out:
 323         spin_unlock(&journal->j_list_lock);
 324         result = jbd2_cleanup_journal_tail(journal);
 325
 326         return (result < 0) ? result : 0;
 327 }
 328
 329 /*
 330  * Check the list of checkpoint transactions for the journal to see if
 331  * we have already got rid of any since the last update of the log tail
 332  * in the journal superblock.  If so, we can instantly roll the
 333  * superblock forward to remove those transactions from the log.
 334  *
 335  * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
 336  *
 337  * Called with the journal lock held.
 338  *
 339  * This is the only part of the journaling code which really needs to be
 340  * aware of transaction aborts.  Checkpointing involves writing to the
 341  * main filesystem area rather than to the journal, so it can proceed
 342  * even in abort state, but we must not update the super block if
 343  * checkpointing may have failed.  Otherwise, we would lose some metadata
 344  * buffers which should be written-back to the filesystem.
 345  */
 346
 347 int jbd2_cleanup_journal_tail(journal_t *journal)
 348 {
 349         tid_t           first_tid;
 350         unsigned long   blocknr;
 351
 352         if (is_journal_aborted(journal))
 353                 return -EIO;
 354
 355         if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
 356                 return 1;
 357         J_ASSERT(blocknr != 0);
 358
 359         /*
 360          * We need to make sure that any blocks that were recently written out
 361          * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
 362          * we drop the transactions from the journal. It's unlikely this will
 363          * be necessary, especially with an appropriately sized journal, but we
 364          * need this to guarantee correctness.  Fortunately
 365          * jbd2_cleanup_journal_tail() doesn't get called all that often.
 366          */
 367         if (journal->j_flags & JBD2_BARRIER)
 368                 blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
 369
 370         return __jbd2_update_log_tail(journal, first_tid, blocknr);
 371 }
 372
 373
 374 /* Checkpoint list management */
 375
 376 /*
 377  * journal_clean_one_cp_list
 378  *
 379  * Find all the written-back checkpoint buffers in the given list and
 380  * release them. If 'destroy' is set, clean all buffers unconditionally.
 381  *
 382  * Called with j_list_lock held.
 383  * Returns 1 if we freed the transaction, 0 otherwise.
 384  */
 385 static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 386 {
 387         struct journal_head *last_jh;
 388         struct journal_head *next_jh = jh;
 389         int ret;
 390
 391         if (!jh)
 392                 return 0;
 393
 394         last_jh = jh->b_cpprev;
 395         do {
 396                 jh = next_jh;
 397                 next_jh = jh->b_cpnext;
 398                 if (!destroy)
 399                         ret = __try_to_free_cp_buf(jh);
 400                 else
 401                         ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 402                 if (!ret)
 403                         return 0;
 404                 if (ret == 2)
 405                         return 1;
 406                 /*
 407                  * This function only frees up some memory
 408                  * if possible so we dont have an obligation
 409                  * to finish processing. Bail out if preemption
 410                  * requested:
 411                  */
 412                 if (need_resched())
 413                         return 0;
 414         } while (jh != last_jh);
 415
 416         return 0;
 417 }
 418
 419 /*
 420  * journal_clean_checkpoint_list
 421  *
 422  * Find all the written-back checkpoint buffers in the journal and release them.
 423  * If 'destroy' is set, release all buffers unconditionally.
 424  *
 425  * Called with j_list_lock held.
 426  */
 427 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 428 {
 429         transaction_t *transaction, *last_transaction, *next_transaction;
 430         int ret;
 431
 432         transaction = journal->j_checkpoint_transactions;
 433         if (!transaction)
 434                 return;
 435
 436         last_transaction = transaction->t_cpprev;
 437         next_transaction = transaction;
 438         do {
 439                 transaction = next_transaction;
 440                 next_transaction = transaction->t_cpnext;
 441                 ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
 442                                                 destroy);
 443                 /*
 444                  * This function only frees up some memory if possible so we
 445                  * dont have an obligation to finish processing. Bail out if
 446                  * preemption requested:
 447                  */
 448                 if (need_resched())
 449                         return;
 450                 if (ret)
 451                         continue;
 452                 /*
 453                  * It is essential that we are as careful as in the case of
 454                  * t_checkpoint_list with removing the buffer from the list as
 455                  * we can possibly see not yet submitted buffers on io_list
 456                  */
 457                 ret = journal_clean_one_cp_list(transaction->
 458                                 t_checkpoint_io_list, destroy);
 459                 if (need_resched())
 460                         return;
 461                 /*
 462                  * Stop scanning if we couldn't free the transaction. This
 463                  * avoids pointless scanning of transactions which still
 464                  * weren't checkpointed.
 465                  */
 466                 if (!ret)
 467                         return;
 468         } while (transaction != last_transaction);
 469 }
 470
 471 /*
 472  * Remove buffers from all checkpoint lists as journal is aborted and we just
 473  * need to free memory
 474  */
 475 void jbd2_journal_destroy_checkpoint(journal_t *journal)
 476 {
 477         /*
 478          * We loop because __jbd2_journal_clean_checkpoint_list() may abort
 479          * early due to a need of rescheduling.
 480          */
 481         while (1) {
 482                 spin_lock(&journal->j_list_lock);
 483                 if (!journal->j_checkpoint_transactions) {
 484                         spin_unlock(&journal->j_list_lock);
 485                         break;
 486                 }
 487                 __jbd2_journal_clean_checkpoint_list(journal, true);
 488                 spin_unlock(&journal->j_list_lock);
 489                 cond_resched();
 490         }
 491 }
 492
 493 /*
 494  * journal_remove_checkpoint: called after a buffer has been committed
 495  * to disk (either by being write-back flushed to disk, or being
 496  * committed to the log).
 497  *
 498  * We cannot safely clean a transaction out of the log until all of the
 499  * buffer updates committed in that transaction have safely been stored
 500  * elsewhere on disk.  To achieve this, all of the buffers in a
 501  * transaction need to be maintained on the transaction's checkpoint
 502  * lists until they have been rewritten, at which point this function is
 503  * called to remove the buffer from the existing transaction's
 504  * checkpoint lists.
 505  *
 506  * The function returns 1 if it frees the transaction, 0 otherwise.
 507  * The function can free jh and bh.
 508  *
 509  * This function is called with j_list_lock held.
 510  */
 511 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 512 {
 513         struct transaction_chp_stats_s *stats;
 514         transaction_t *transaction;
 515         journal_t *journal;
 516         int ret = 0;
 517
 518         JBUFFER_TRACE(jh, "entry");
 519
 520         if ((transaction = jh->b_cp_transaction) == NULL) {
 521                 JBUFFER_TRACE(jh, "not on transaction");
 522                 goto out;
 523         }
 524         journal = transaction->t_journal;
 525
 526         JBUFFER_TRACE(jh, "removing from transaction");
 527         __buffer_unlink(jh);
 528         jh->b_cp_transaction = NULL;
 529         jbd2_journal_put_journal_head(jh);
 530
 531         if (transaction->t_checkpoint_list != NULL ||
 532             transaction->t_checkpoint_io_list != NULL)
 533                 goto out;
 534
 535         /*
 536          * There is one special case to worry about: if we have just pulled the
 537          * buffer off a running or committing transaction's checkpoing list,
 538          * then even if the checkpoint list is empty, the transaction obviously
 539          * cannot be dropped!
 540          *
 541          * The locking here around t_state is a bit sleazy.
 542          * See the comment at the end of jbd2_journal_commit_transaction().
 543          */
 544         if (transaction->t_state != T_FINISHED)
 545                 goto out;
 546
 547         /* OK, that was the last buffer for the transaction: we can now
 548            safely remove this transaction from the log */
 549         stats = &transaction->t_chp_stats;
 550         if (stats->cs_chp_time)
 551                 stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
 552                                                     jiffies);
 553         trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
 554                                     transaction->t_tid, stats);
 555
 556         __jbd2_journal_drop_transaction(journal, transaction);
 557         jbd2_journal_free_transaction(transaction);
 558         ret = 1;
 559 out:
 560         return ret;
 561 }
 562
 563 /*
 564  * journal_insert_checkpoint: put a committed buffer onto a checkpoint
 565  * list so that we know when it is safe to clean the transaction out of
 566  * the log.
 567  *
 568  * Called with the journal locked.
 569  * Called with j_list_lock held.
 570  */
 571 void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 572                                transaction_t *transaction)
 573 {
 574         JBUFFER_TRACE(jh, "entry");
 575         J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 576         J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 577
 578         /* Get reference for checkpointing transaction */
 579         jbd2_journal_grab_journal_head(jh2bh(jh));
 580         jh->b_cp_transaction = transaction;
 581
 582         if (!transaction->t_checkpoint_list) {
 583                 jh->b_cpnext = jh->b_cpprev = jh;
 584         } else {
 585                 jh->b_cpnext = transaction->t_checkpoint_list;
 586                 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
 587                 jh->b_cpprev->b_cpnext = jh;
 588                 jh->b_cpnext->b_cpprev = jh;
 589         }
 590         transaction->t_checkpoint_list = jh;
 591 }
 592
 593 /*
 594  * We've finished with this transaction structure: adios...
 595  *
 596  * The transaction must have no links except for the checkpoint by this
 597  * point.
 598  *
 599  * Called with the journal locked.
 600  * Called with j_list_lock held.
 601  */
 602
 603 void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 604 {
 605         assert_spin_locked(&journal->j_list_lock);
 606         if (transaction->t_cpnext) {
 607                 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
 608                 transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
 609                 if (journal->j_checkpoint_transactions == transaction)
 610                         journal->j_checkpoint_transactions =
 611                                 transaction->t_cpnext;
 612                 if (journal->j_checkpoint_transactions == transaction)
 613                         journal->j_checkpoint_transactions = NULL;
 614         }
 615
 616         J_ASSERT(transaction->t_state == T_FINISHED);
 617         J_ASSERT(transaction->t_buffers == NULL);
 618         J_ASSERT(transaction->t_forget == NULL);
 619         J_ASSERT(transaction->t_shadow_list == NULL);
 620         J_ASSERT(transaction->t_checkpoint_list == NULL);
 621         J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 622         J_ASSERT(atomic_read(&transaction->t_updates) == 0);
 623         J_ASSERT(journal->j_committing_transaction != transaction);
 624         J_ASSERT(journal->j_running_transaction != transaction);
 625
 626         trace_jbd2_drop_transaction(journal, transaction);
 627
 628         jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
 629 }