2 * Copyright (C) International Business Machines Corp., 2000-2005
3 * Portions Copyright (C) Christoph Hellwig, 2001-2002
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <linux/module.h>
23 #include <linux/bio.h>
24 #include <linux/slab.h>
25 #include <linux/init.h>
26 #include <linux/buffer_head.h>
27 #include <linux/mempool.h>
28 #include <linux/seq_file.h>
29 #include "jfs_incore.h"
30 #include "jfs_superblock.h"
31 #include "jfs_filsys.h"
32 #include "jfs_metapage.h"
33 #include "jfs_txnmgr.h"
34 #include "jfs_debug.h"
36 #ifdef CONFIG_JFS_STATISTICS
38 uint pagealloc; /* # of page allocations */
39 uint pagefree; /* # of page frees */
40 uint lockwait; /* # of sleeping lock_metapage() calls */
44 #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
45 #define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)
47 static inline void unlock_metapage(struct metapage *mp)
49 clear_bit_unlock(META_locked, &mp->flag);
53 static inline void __lock_metapage(struct metapage *mp)
55 DECLARE_WAITQUEUE(wait, current);
56 INCREMENT(mpStat.lockwait);
57 add_wait_queue_exclusive(&mp->wait, &wait);
59 set_current_state(TASK_UNINTERRUPTIBLE);
60 if (metapage_locked(mp)) {
61 unlock_page(mp->page);
65 } while (trylock_metapage(mp));
66 __set_current_state(TASK_RUNNING);
67 remove_wait_queue(&mp->wait, &wait);
71 * Must have mp->page locked
73 static inline void lock_metapage(struct metapage *mp)
75 if (trylock_metapage(mp))
79 #define METAPOOL_MIN_PAGES 32
80 static struct kmem_cache *metapage_cache;
81 static mempool_t *metapage_mempool;
83 #define MPS_PER_PAGE (PAGE_SIZE >> L2PSIZE)
90 struct metapage *mp[MPS_PER_PAGE];
92 #define mp_anchor(page) ((struct meta_anchor *)page_private(page))
94 static inline struct metapage *page_to_mp(struct page *page, int offset)
96 if (!PagePrivate(page))
98 return mp_anchor(page)->mp[offset >> L2PSIZE];
101 static inline int insert_metapage(struct page *page, struct metapage *mp)
103 struct meta_anchor *a;
105 int l2mp_blocks; /* log2 blocks per metapage */
107 if (PagePrivate(page))
110 a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
113 set_page_private(page, (unsigned long)a);
114 SetPagePrivate(page);
119 l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
120 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
128 static inline void remove_metapage(struct page *page, struct metapage *mp)
130 struct meta_anchor *a = mp_anchor(page);
131 int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
134 index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
136 BUG_ON(a->mp[index] != mp);
139 if (--a->mp_count == 0) {
141 set_page_private(page, 0);
142 ClearPagePrivate(page);
147 static inline void inc_io(struct page *page)
149 atomic_inc(&mp_anchor(page)->io_count);
152 static inline void dec_io(struct page *page, void (*handler) (struct page *))
154 if (atomic_dec_and_test(&mp_anchor(page)->io_count))
159 static inline struct metapage *page_to_mp(struct page *page, int offset)
161 return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
164 static inline int insert_metapage(struct page *page, struct metapage *mp)
167 set_page_private(page, (unsigned long)mp);
168 SetPagePrivate(page);
174 static inline void remove_metapage(struct page *page, struct metapage *mp)
176 set_page_private(page, 0);
177 ClearPagePrivate(page);
181 #define inc_io(page) do {} while(0)
182 #define dec_io(page, handler) handler(page)
186 static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
188 struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
196 init_waitqueue_head(&mp->wait);
201 static inline void free_metapage(struct metapage *mp)
203 mempool_free(mp, metapage_mempool);
206 int __init metapage_init(void)
209 * Allocate the metapage structures
211 metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
213 if (metapage_cache == NULL)
216 metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
219 if (metapage_mempool == NULL) {
220 kmem_cache_destroy(metapage_cache);
227 void metapage_exit(void)
229 mempool_destroy(metapage_mempool);
230 kmem_cache_destroy(metapage_cache);
233 static inline void drop_metapage(struct page *page, struct metapage *mp)
235 if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
236 test_bit(META_io, &mp->flag))
238 remove_metapage(page, mp);
239 INCREMENT(mpStat.pagefree);
244 * Metapage address space operations
247 static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
253 sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
256 if (lblock >= file_blocks)
258 if (lblock + *len > file_blocks)
259 *len = file_blocks - lblock;
262 rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
263 if ((rc == 0) && *len)
264 lblock = (sector_t)xaddr;
267 } /* else no mapping */
272 static void last_read_complete(struct page *page)
274 if (!PageError(page))
275 SetPageUptodate(page);
279 static void metapage_read_end_io(struct bio *bio)
281 struct page *page = bio->bi_private;
283 if (bio->bi_status) {
284 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
288 dec_io(page, last_read_complete);
292 static void remove_from_logsync(struct metapage *mp)
294 struct jfs_log *log = mp->log;
297 * This can race. Recheck that log hasn't been set to null, and after
298 * acquiring logsync lock, recheck lsn
303 LOGSYNC_LOCK(log, flags);
309 list_del(&mp->synclist);
311 LOGSYNC_UNLOCK(log, flags);
314 static void last_write_complete(struct page *page)
319 for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
320 mp = page_to_mp(page, offset);
321 if (mp && test_bit(META_io, &mp->flag)) {
323 remove_from_logsync(mp);
324 clear_bit(META_io, &mp->flag);
327 * I'd like to call drop_metapage here, but I don't think it's
328 * safe unless I have the page locked
331 end_page_writeback(page);
334 static void metapage_write_end_io(struct bio *bio)
336 struct page *page = bio->bi_private;
338 BUG_ON(!PagePrivate(page));
340 if (bio->bi_status) {
341 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
344 dec_io(page, last_write_complete);
348 static int metapage_writepage(struct page *page, struct writeback_control *wbc)
350 struct bio *bio = NULL;
351 int block_offset; /* block offset of mp within page */
352 struct inode *inode = page->mapping->host;
353 int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
361 sector_t next_block = 0;
363 unsigned long bio_bytes = 0;
364 unsigned long bio_offset = 0;
368 page_start = (sector_t)page->index <<
369 (PAGE_SHIFT - inode->i_blkbits);
370 BUG_ON(!PageLocked(page));
371 BUG_ON(PageWriteback(page));
372 set_page_writeback(page);
374 for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
375 mp = page_to_mp(page, offset);
377 if (!mp || !test_bit(META_dirty, &mp->flag))
380 if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
383 * Make sure this page isn't blocked indefinitely.
384 * If the journal isn't undergoing I/O, push it
386 if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
387 jfs_flush_journal(mp->log, 0);
391 clear_bit(META_dirty, &mp->flag);
392 set_bit(META_io, &mp->flag);
393 block_offset = offset >> inode->i_blkbits;
394 lblock = page_start + block_offset;
396 if (xlen && lblock == next_block) {
397 /* Contiguous, in memory & on disk */
398 len = min(xlen, blocks_per_mp);
400 bio_bytes += len << inode->i_blkbits;
404 if (bio_add_page(bio, page, bio_bytes, bio_offset) <
408 * Increment counter before submitting i/o to keep
409 * count from hitting zero before we're through
412 if (!bio->bi_iter.bi_size)
419 xlen = (PAGE_SIZE - offset) >> inode->i_blkbits;
420 pblock = metapage_get_blocks(inode, lblock, &xlen);
422 printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
424 * We already called inc_io(), but can't cancel it
425 * with dec_io() until we're done with the page
430 len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
432 bio = bio_alloc(GFP_NOFS, 1);
433 bio_set_dev(bio, inode->i_sb->s_bdev);
434 bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
435 bio->bi_end_io = metapage_write_end_io;
436 bio->bi_private = page;
437 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
439 /* Don't call bio_add_page yet, we may add to this vec */
441 bio_bytes = len << inode->i_blkbits;
444 next_block = lblock + len;
447 if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
449 if (!bio->bi_iter.bi_size)
456 redirty_page_for_writepage(wbc, page);
463 if (nr_underway == 0)
464 end_page_writeback(page);
468 /* We should never reach here, since we're only adding one vec */
469 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
472 print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
473 4, bio, sizeof(*bio), 0);
477 dec_io(page, last_write_complete);
480 dec_io(page, last_write_complete);
484 static int metapage_readpage(struct file *fp, struct page *page)
486 struct inode *inode = page->mapping->host;
487 struct bio *bio = NULL;
489 int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
490 sector_t page_start; /* address of page in fs blocks */
496 BUG_ON(!PageLocked(page));
497 page_start = (sector_t)page->index <<
498 (PAGE_SHIFT - inode->i_blkbits);
501 while (block_offset < blocks_per_page) {
502 xlen = blocks_per_page - block_offset;
503 pblock = metapage_get_blocks(inode, page_start + block_offset,
506 if (!PagePrivate(page))
507 insert_metapage(page, NULL);
512 bio = bio_alloc(GFP_NOFS, 1);
513 bio_set_dev(bio, inode->i_sb->s_bdev);
514 bio->bi_iter.bi_sector =
515 pblock << (inode->i_blkbits - 9);
516 bio->bi_end_io = metapage_read_end_io;
517 bio->bi_private = page;
518 bio_set_op_attrs(bio, REQ_OP_READ, 0);
519 len = xlen << inode->i_blkbits;
520 offset = block_offset << inode->i_blkbits;
521 if (bio_add_page(bio, page, len, offset) < len)
523 block_offset += xlen;
535 printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
537 dec_io(page, last_read_complete);
541 static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
547 for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
548 mp = page_to_mp(page, offset);
553 jfs_info("metapage_releasepage: mp = 0x%p", mp);
554 if (mp->count || mp->nohomeok ||
555 test_bit(META_dirty, &mp->flag)) {
556 jfs_info("count = %ld, nohomeok = %d", mp->count,
562 remove_from_logsync(mp);
563 remove_metapage(page, mp);
564 INCREMENT(mpStat.pagefree);
570 static void metapage_invalidatepage(struct page *page, unsigned int offset,
573 BUG_ON(offset || length < PAGE_SIZE);
575 BUG_ON(PageWriteback(page));
577 metapage_releasepage(page, 0);
580 const struct address_space_operations jfs_metapage_aops = {
581 .readpage = metapage_readpage,
582 .writepage = metapage_writepage,
583 .releasepage = metapage_releasepage,
584 .invalidatepage = metapage_invalidatepage,
585 .set_page_dirty = __set_page_dirty_nobuffers,
588 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
589 unsigned int size, int absolute,
594 struct address_space *mapping;
595 struct metapage *mp = NULL;
597 unsigned long page_index;
598 unsigned long page_offset;
600 jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
601 inode->i_ino, lblock, absolute);
603 l2bsize = inode->i_blkbits;
604 l2BlocksPerPage = PAGE_SHIFT - l2bsize;
605 page_index = lblock >> l2BlocksPerPage;
606 page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
607 if ((page_offset + size) > PAGE_SIZE) {
608 jfs_err("MetaData crosses page boundary!!");
609 jfs_err("lblock = %lx, size = %d", lblock, size);
614 mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
617 * If an nfs client tries to read an inode that is larger
618 * than any existing inodes, we may try to read past the
619 * end of the inode map
621 if ((lblock << inode->i_blkbits) >= inode->i_size)
623 mapping = inode->i_mapping;
626 if (new && (PSIZE == PAGE_SIZE)) {
627 page = grab_cache_page(mapping, page_index);
629 jfs_err("grab_cache_page failed!");
632 SetPageUptodate(page);
634 page = read_mapping_page(mapping, page_index, NULL);
635 if (IS_ERR(page) || !PageUptodate(page)) {
636 jfs_err("read_mapping_page failed!");
642 mp = page_to_mp(page, page_offset);
644 if (mp->logical_size != size) {
645 jfs_error(inode->i_sb,
646 "get_mp->logical_size != size\n");
647 jfs_err("logical_size = %d, size = %d",
648 mp->logical_size, size);
654 if (test_bit(META_discard, &mp->flag)) {
656 jfs_error(inode->i_sb,
657 "using a discarded metapage\n");
658 discard_metapage(mp);
661 clear_bit(META_discard, &mp->flag);
664 INCREMENT(mpStat.pagealloc);
665 mp = alloc_metapage(GFP_NOFS);
667 mp->sb = inode->i_sb;
669 mp->xflag = COMMIT_PAGE;
672 mp->logical_size = size;
673 mp->data = page_address(page) + page_offset;
675 if (unlikely(insert_metapage(page, mp))) {
683 jfs_info("zeroing mp = 0x%p", mp);
684 memset(mp->data, 0, PSIZE);
688 jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
696 void grab_metapage(struct metapage * mp)
698 jfs_info("grab_metapage: mp = 0x%p", mp);
703 unlock_page(mp->page);
706 void force_metapage(struct metapage *mp)
708 struct page *page = mp->page;
709 jfs_info("force_metapage: mp = 0x%p", mp);
710 set_bit(META_forcewrite, &mp->flag);
711 clear_bit(META_sync, &mp->flag);
714 set_page_dirty(page);
715 if (write_one_page(page))
716 jfs_error(mp->sb, "write_one_page() failed\n");
717 clear_bit(META_forcewrite, &mp->flag);
721 void hold_metapage(struct metapage *mp)
726 void put_metapage(struct metapage *mp)
728 if (mp->count || mp->nohomeok) {
729 /* Someone else will release this */
730 unlock_page(mp->page);
736 unlock_page(mp->page);
737 release_metapage(mp);
740 void release_metapage(struct metapage * mp)
742 struct page *page = mp->page;
743 jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
751 if (--mp->count || mp->nohomeok) {
757 if (test_bit(META_dirty, &mp->flag)) {
758 set_page_dirty(page);
759 if (test_bit(META_sync, &mp->flag)) {
760 clear_bit(META_sync, &mp->flag);
761 if (write_one_page(page))
762 jfs_error(mp->sb, "write_one_page() failed\n");
763 lock_page(page); /* write_one_page unlocks the page */
765 } else if (mp->lsn) /* discard_metapage doesn't remove it */
766 remove_from_logsync(mp);
768 /* Try to keep metapages from using up too much memory */
769 drop_metapage(page, mp);
775 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
778 int l2BlocksPerPage = PAGE_SHIFT - ip->i_blkbits;
779 int BlocksPerPage = 1 << l2BlocksPerPage;
780 /* All callers are interested in block device's mapping */
781 struct address_space *mapping =
782 JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
788 * Mark metapages to discard. They will eventually be
789 * released, but should not be written.
791 for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
792 lblock += BlocksPerPage) {
793 page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
796 for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
797 mp = page_to_mp(page, offset);
800 if (mp->index < addr)
802 if (mp->index >= addr + len)
805 clear_bit(META_dirty, &mp->flag);
806 set_bit(META_discard, &mp->flag);
808 remove_from_logsync(mp);
815 #ifdef CONFIG_JFS_STATISTICS
816 static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
819 "JFS Metapage statistics\n"
820 "=======================\n"
821 "page allocations = %d\n"
830 static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
832 return single_open(file, jfs_mpstat_proc_show, NULL);
835 const struct file_operations jfs_mpstat_proc_fops = {
836 .open = jfs_mpstat_proc_open,
839 .release = single_release,