mm/page_io.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/mm/page_io.c
   4  *
   5  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
   6  *
   7  *  Swap reorganised 29.12.95,
   8  *  Asynchronous swapping added 30.12.95. Stephen Tweedie
   9  *  Removed race in async swapping. 14.4.1996. Bruno Haible
  10  *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
  11  *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
  12  */
  13
  14 #include <linux/mm.h>
  15 #include <linux/kernel_stat.h>
  16 #include <linux/gfp.h>
  17 #include <linux/pagemap.h>
  18 #include <linux/swap.h>
  19 #include <linux/bio.h>
  20 #include <linux/swapops.h>
  21 #include <linux/buffer_head.h>
  22 #include <linux/writeback.h>
  23 #include <linux/frontswap.h>
  24 #include <linux/blkdev.h>
  25 #include <linux/uio.h>
  26 #include <linux/sched/task.h>
  27 #include <asm/pgtable.h>
  28
  29 static struct bio *get_swap_bio(gfp_t gfp_flags,
  30                                 struct page *page, bio_end_io_t end_io)
  31 {
  32         struct bio *bio;
  33
  34         bio = bio_alloc(gfp_flags, 1);
  35         if (bio) {
  36                 struct block_device *bdev;
  37
  38                 bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
  39                 bio_set_dev(bio, bdev);
  40                 bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
  41                 bio->bi_end_io = end_io;
  42
  43                 bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
  44         }
  45         return bio;
  46 }
  47
  48 void end_swap_bio_write(struct bio *bio)
  49 {
  50         struct page *page = bio_first_page_all(bio);
  51
  52         if (bio->bi_status) {
  53                 SetPageError(page);
  54                 /*
  55                  * We failed to write the page out to swap-space.
  56                  * Re-dirty the page in order to avoid it being reclaimed.
  57                  * Also print a dire warning that things will go BAD (tm)
  58                  * very quickly.
  59                  *
  60                  * Also clear PG_reclaim to avoid rotate_reclaimable_page()
  61                  */
  62                 set_page_dirty(page);
  63                 pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
  64                          MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
  65                          (unsigned long long)bio->bi_iter.bi_sector);
  66                 ClearPageReclaim(page);
  67         }
  68         end_page_writeback(page);
  69         bio_put(bio);
  70 }
  71
  72 static void end_swap_bio_read(struct bio *bio)
  73 {
  74         struct page *page = bio_first_page_all(bio);
  75         struct task_struct *waiter = bio->bi_private;
  76
  77         if (bio->bi_status) {
  78                 SetPageError(page);
  79                 ClearPageUptodate(page);
  80                 pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
  81                          MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
  82                          (unsigned long long)bio->bi_iter.bi_sector);
  83                 goto out;
  84         }
  85
  86         SetPageUptodate(page);
  87 out:
  88         unlock_page(page);
  89         WRITE_ONCE(bio->bi_private, NULL);
  90         bio_put(bio);
  91         if (waiter) {
  92                 blk_wake_io_task(waiter);
  93                 put_task_struct(waiter);
  94         }
  95 }
  96
  97 int generic_swapfile_activate(struct swap_info_struct *sis,
  98                                 struct file *swap_file,
  99                                 sector_t *span)
 100 {
 101         struct address_space *mapping = swap_file->f_mapping;
 102         struct inode *inode = mapping->host;
 103         unsigned blocks_per_page;
 104         unsigned long page_no;
 105         unsigned blkbits;
 106         sector_t probe_block;
 107         sector_t last_block;
 108         sector_t lowest_block = -1;
 109         sector_t highest_block = 0;
 110         int nr_extents = 0;
 111         int ret;
 112
 113         blkbits = inode->i_blkbits;
 114         blocks_per_page = PAGE_SIZE >> blkbits;
 115
 116         /*
 117          * Map all the blocks into the extent tree.  This code doesn't try
 118          * to be very smart.
 119          */
 120         probe_block = 0;
 121         page_no = 0;
 122         last_block = i_size_read(inode) >> blkbits;
 123         while ((probe_block + blocks_per_page) <= last_block &&
 124                         page_no < sis->max) {
 125                 unsigned block_in_page;
 126                 sector_t first_block;
 127
 128                 cond_resched();
 129
 130                 first_block = bmap(inode, probe_block);
 131                 if (first_block == 0)
 132                         goto bad_bmap;
 133
 134                 /*
 135                  * It must be PAGE_SIZE aligned on-disk
 136                  */
 137                 if (first_block & (blocks_per_page - 1)) {
 138                         probe_block++;
 139                         goto reprobe;
 140                 }
 141
 142                 for (block_in_page = 1; block_in_page < blocks_per_page;
 143                                         block_in_page++) {
 144                         sector_t block;
 145
 146                         block = bmap(inode, probe_block + block_in_page);
 147                         if (block == 0)
 148                                 goto bad_bmap;
 149                         if (block != first_block + block_in_page) {
 150                                 /* Discontiguity */
 151                                 probe_block++;
 152                                 goto reprobe;
 153                         }
 154                 }
 155
 156                 first_block >>= (PAGE_SHIFT - blkbits);
 157                 if (page_no) {  /* exclude the header page */
 158                         if (first_block < lowest_block)
 159                                 lowest_block = first_block;
 160                         if (first_block > highest_block)
 161                                 highest_block = first_block;
 162                 }
 163
 164                 /*
 165                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
 166                  */
 167                 ret = add_swap_extent(sis, page_no, 1, first_block);
 168                 if (ret < 0)
 169                         goto out;
 170                 nr_extents += ret;
 171                 page_no++;
 172                 probe_block += blocks_per_page;
 173 reprobe:
 174                 continue;
 175         }
 176         ret = nr_extents;
 177         *span = 1 + highest_block - lowest_block;
 178         if (page_no == 0)
 179                 page_no = 1;    /* force Empty message */
 180         sis->max = page_no;
 181         sis->pages = page_no - 1;
 182         sis->highest_bit = page_no - 1;
 183 out:
 184         return ret;
 185 bad_bmap:
 186         pr_err("swapon: swapfile has holes\n");
 187         ret = -EINVAL;
 188         goto out;
 189 }
 190
 191 /*
 192  * We may have stale swap cache pages in memory: notice
 193  * them here and get rid of the unnecessary final write.
 194  */
 195 int swap_writepage(struct page *page, struct writeback_control *wbc)
 196 {
 197         int ret = 0;
 198
 199         if (try_to_free_swap(page)) {
 200                 unlock_page(page);
 201                 goto out;
 202         }
 203         if (frontswap_store(page) == 0) {
 204                 set_page_writeback(page);
 205                 unlock_page(page);
 206                 end_page_writeback(page);
 207                 goto out;
 208         }
 209         ret = __swap_writepage(page, wbc, end_swap_bio_write);
 210 out:
 211         return ret;
 212 }
 213
 214 static inline void count_swpout_vm_event(struct page *page)
 215 {
 216 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 217         if (unlikely(PageTransHuge(page)))
 218                 count_vm_event(THP_SWPOUT);
 219 #endif
 220         count_vm_events(PSWPOUT, hpage_nr_pages(page));
 221 }
 222
 223 int __swap_writepage(struct page *page, struct writeback_control *wbc,
 224                 bio_end_io_t end_write_func)
 225 {
 226         struct bio *bio;
 227         int ret;
 228         struct swap_info_struct *sis = page_swap_info(page);
 229
 230         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 231         if (sis->flags & SWP_FS) {
 232                 struct kiocb kiocb;
 233                 struct file *swap_file = sis->swap_file;
 234                 struct address_space *mapping = swap_file->f_mapping;
 235                 struct bio_vec bv = {
 236                         .bv_page = page,
 237                         .bv_len  = PAGE_SIZE,
 238                         .bv_offset = 0
 239                 };
 240                 struct iov_iter from;
 241
 242                 iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 243                 init_sync_kiocb(&kiocb, swap_file);
 244                 kiocb.ki_pos = page_file_offset(page);
 245
 246                 set_page_writeback(page);
 247                 unlock_page(page);
 248                 ret = mapping->a_ops->direct_IO(&kiocb, &from);
 249                 if (ret == PAGE_SIZE) {
 250                         count_vm_event(PSWPOUT);
 251                         ret = 0;
 252                 } else {
 253                         /*
 254                          * In the case of swap-over-nfs, this can be a
 255                          * temporary failure if the system has limited
 256                          * memory for allocating transmit buffers.
 257                          * Mark the page dirty and avoid
 258                          * rotate_reclaimable_page but rate-limit the
 259                          * messages but do not flag PageError like
 260                          * the normal direct-to-bio case as it could
 261                          * be temporary.
 262                          */
 263                         set_page_dirty(page);
 264                         ClearPageReclaim(page);
 265                         pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
 266                                            page_file_offset(page));
 267                 }
 268                 end_page_writeback(page);
 269                 return ret;
 270         }
 271
 272         ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
 273         if (!ret) {
 274                 count_swpout_vm_event(page);
 275                 return 0;
 276         }
 277
 278         ret = 0;
 279         bio = get_swap_bio(GFP_NOIO, page, end_write_func);
 280         if (bio == NULL) {
 281                 set_page_dirty(page);
 282                 unlock_page(page);
 283                 ret = -ENOMEM;
 284                 goto out;
 285         }
 286         bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
 287         bio_associate_blkg_from_page(bio, page);
 288         count_swpout_vm_event(page);
 289         set_page_writeback(page);
 290         unlock_page(page);
 291         submit_bio(bio);
 292 out:
 293         return ret;
 294 }
 295
 296 int swap_readpage(struct page *page, bool synchronous)
 297 {
 298         struct bio *bio;
 299         int ret = 0;
 300         struct swap_info_struct *sis = page_swap_info(page);
 301         blk_qc_t qc;
 302         struct gendisk *disk;
 303
 304         VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
 305         VM_BUG_ON_PAGE(!PageLocked(page), page);
 306         VM_BUG_ON_PAGE(PageUptodate(page), page);
 307         if (frontswap_load(page) == 0) {
 308                 SetPageUptodate(page);
 309                 unlock_page(page);
 310                 goto out;
 311         }
 312
 313         if (sis->flags & SWP_FS) {
 314                 struct file *swap_file = sis->swap_file;
 315                 struct address_space *mapping = swap_file->f_mapping;
 316
 317                 ret = mapping->a_ops->readpage(swap_file, page);
 318                 if (!ret)
 319                         count_vm_event(PSWPIN);
 320                 return ret;
 321         }
 322
 323         ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
 324         if (!ret) {
 325                 count_vm_event(PSWPIN);
 326                 return 0;
 327         }
 328
 329         ret = 0;
 330         bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
 331         if (bio == NULL) {
 332                 unlock_page(page);
 333                 ret = -ENOMEM;
 334                 goto out;
 335         }
 336         disk = bio->bi_disk;
 337         /*
 338          * Keep this task valid during swap readpage because the oom killer may
 339          * attempt to access it in the page fault retry time check.
 340          */
 341         bio_set_op_attrs(bio, REQ_OP_READ, 0);
 342         if (synchronous) {
 343                 bio->bi_opf |= REQ_HIPRI;
 344                 get_task_struct(current);
 345                 bio->bi_private = current;
 346         }
 347         count_vm_event(PSWPIN);
 348         bio_get(bio);
 349         qc = submit_bio(bio);
 350         while (synchronous) {
 351                 set_current_state(TASK_UNINTERRUPTIBLE);
 352                 if (!READ_ONCE(bio->bi_private))
 353                         break;
 354
 355                 if (!blk_poll(disk->queue, qc, true))
 356                         io_schedule();
 357         }
 358         __set_current_state(TASK_RUNNING);
 359         bio_put(bio);
 360
 361 out:
 362         return ret;
 363 }
 364
 365 int swap_set_page_dirty(struct page *page)
 366 {
 367         struct swap_info_struct *sis = page_swap_info(page);
 368
 369         if (sis->flags & SWP_FS) {
 370                 struct address_space *mapping = sis->swap_file->f_mapping;
 371
 372                 VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 373                 return mapping->a_ops->set_page_dirty(page);
 374         } else {
 375                 return __set_page_dirty_no_writeback(page);
 376         }
 377 }