4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/f2fs_fs.h>
13 #include <linux/mpage.h>
14 #include <linux/backing-dev.h>
15 #include <linux/blkdev.h>
16 #include <linux/pagevec.h>
17 #include <linux/swap.h>
24 #include <trace/events/f2fs.h>
26 #define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
28 static struct kmem_cache *nat_entry_slab;
29 static struct kmem_cache *free_nid_slab;
30 static struct kmem_cache *nat_entry_set_slab;
33 * Check whether the given nid is within node id range.
35 int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
37 if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
38 set_sbi_flag(sbi, SBI_NEED_FSCK);
39 f2fs_msg(sbi->sb, KERN_WARNING,
40 "%s: out-of-range nid=%x, run fsck to fix.",
47 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
49 struct f2fs_nm_info *nm_i = NM_I(sbi);
51 unsigned long avail_ram;
52 unsigned long mem_size = 0;
57 /* only uses low memory */
58 avail_ram = val.totalram - val.totalhigh;
61 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
63 if (type == FREE_NIDS) {
64 mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
65 sizeof(struct free_nid)) >> PAGE_SHIFT;
66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
67 } else if (type == NAT_ENTRIES) {
68 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
70 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
71 if (excess_cached_nats(sbi))
73 } else if (type == DIRTY_DENTS) {
74 if (sbi->sb->s_bdi->wb.dirty_exceeded)
76 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
77 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
78 } else if (type == INO_ENTRIES) {
81 for (i = 0; i <= UPDATE_INO; i++)
82 mem_size += sbi->im[i].ino_num *
83 sizeof(struct ino_entry);
84 mem_size >>= PAGE_SHIFT;
85 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
86 } else if (type == EXTENT_CACHE) {
87 mem_size = (atomic_read(&sbi->total_ext_tree) *
88 sizeof(struct extent_tree) +
89 atomic_read(&sbi->total_ext_node) *
90 sizeof(struct extent_node)) >> PAGE_SHIFT;
91 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
93 if (!sbi->sb->s_bdi->wb.dirty_exceeded)
99 static void clear_node_page_dirty(struct page *page)
101 struct address_space *mapping = page->mapping;
102 unsigned int long flags;
104 if (PageDirty(page)) {
105 spin_lock_irqsave(&mapping->tree_lock, flags);
106 radix_tree_tag_clear(&mapping->page_tree,
108 PAGECACHE_TAG_DIRTY);
109 spin_unlock_irqrestore(&mapping->tree_lock, flags);
111 clear_page_dirty_for_io(page);
112 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
114 ClearPageUptodate(page);
117 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
119 pgoff_t index = current_nat_addr(sbi, nid);
120 return get_meta_page(sbi, index);
123 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
125 struct page *src_page;
126 struct page *dst_page;
131 struct f2fs_nm_info *nm_i = NM_I(sbi);
133 src_off = current_nat_addr(sbi, nid);
134 dst_off = next_nat_addr(sbi, src_off);
136 /* get current nat block page with lock */
137 src_page = get_meta_page(sbi, src_off);
138 dst_page = grab_meta_page(sbi, dst_off);
139 f2fs_bug_on(sbi, PageDirty(src_page));
141 src_addr = page_address(src_page);
142 dst_addr = page_address(dst_page);
143 memcpy(dst_addr, src_addr, PAGE_SIZE);
144 set_page_dirty(dst_page);
145 f2fs_put_page(src_page, 1);
147 set_to_next_nat(nm_i, nid);
152 static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
154 return radix_tree_lookup(&nm_i->nat_root, n);
157 static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
158 nid_t start, unsigned int nr, struct nat_entry **ep)
160 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
163 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
166 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
168 kmem_cache_free(nat_entry_slab, e);
171 static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
172 struct nat_entry *ne)
174 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
175 struct nat_entry_set *head;
177 head = radix_tree_lookup(&nm_i->nat_set_root, set);
179 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
181 INIT_LIST_HEAD(&head->entry_list);
182 INIT_LIST_HEAD(&head->set_list);
185 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
188 if (get_nat_flag(ne, IS_DIRTY))
191 nm_i->dirty_nat_cnt++;
193 set_nat_flag(ne, IS_DIRTY, true);
195 if (nat_get_blkaddr(ne) == NEW_ADDR)
196 list_del_init(&ne->list);
198 list_move_tail(&ne->list, &head->entry_list);
201 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
202 struct nat_entry_set *set, struct nat_entry *ne)
204 list_move_tail(&ne->list, &nm_i->nat_entries);
205 set_nat_flag(ne, IS_DIRTY, false);
207 nm_i->dirty_nat_cnt--;
210 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
211 nid_t start, unsigned int nr, struct nat_entry_set **ep)
213 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
217 int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
219 struct f2fs_nm_info *nm_i = NM_I(sbi);
223 down_read(&nm_i->nat_tree_lock);
224 e = __lookup_nat_cache(nm_i, nid);
226 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
227 !get_nat_flag(e, HAS_FSYNCED_INODE))
230 up_read(&nm_i->nat_tree_lock);
234 bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
236 struct f2fs_nm_info *nm_i = NM_I(sbi);
240 down_read(&nm_i->nat_tree_lock);
241 e = __lookup_nat_cache(nm_i, nid);
242 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
244 up_read(&nm_i->nat_tree_lock);
248 bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
250 struct f2fs_nm_info *nm_i = NM_I(sbi);
252 bool need_update = true;
254 down_read(&nm_i->nat_tree_lock);
255 e = __lookup_nat_cache(nm_i, ino);
256 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
257 (get_nat_flag(e, IS_CHECKPOINTED) ||
258 get_nat_flag(e, HAS_FSYNCED_INODE)))
260 up_read(&nm_i->nat_tree_lock);
264 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
267 struct nat_entry *new;
270 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
271 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
273 new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
276 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
277 kmem_cache_free(nat_entry_slab, new);
282 memset(new, 0, sizeof(struct nat_entry));
283 nat_set_nid(new, nid);
285 list_add_tail(&new->list, &nm_i->nat_entries);
290 static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
291 struct f2fs_nat_entry *ne)
293 struct f2fs_nm_info *nm_i = NM_I(sbi);
296 e = __lookup_nat_cache(nm_i, nid);
298 e = grab_nat_entry(nm_i, nid, false);
300 node_info_from_raw_nat(&e->ni, ne);
302 f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
303 nat_get_blkaddr(e) !=
304 le32_to_cpu(ne->block_addr) ||
305 nat_get_version(e) != ne->version);
309 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
310 block_t new_blkaddr, bool fsync_done)
312 struct f2fs_nm_info *nm_i = NM_I(sbi);
315 down_write(&nm_i->nat_tree_lock);
316 e = __lookup_nat_cache(nm_i, ni->nid);
318 e = grab_nat_entry(nm_i, ni->nid, true);
319 copy_node_info(&e->ni, ni);
320 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
321 } else if (new_blkaddr == NEW_ADDR) {
323 * when nid is reallocated,
324 * previous nat entry can be remained in nat cache.
325 * So, reinitialize it with new information.
327 copy_node_info(&e->ni, ni);
328 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
332 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
333 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
334 new_blkaddr == NULL_ADDR);
335 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
336 new_blkaddr == NEW_ADDR);
337 f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
338 new_blkaddr == NEW_ADDR);
340 /* increment version no as node is removed */
341 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
342 unsigned char version = nat_get_version(e);
343 nat_set_version(e, inc_node_version(version));
345 /* in order to reuse the nid */
346 if (nm_i->next_scan_nid > ni->nid)
347 nm_i->next_scan_nid = ni->nid;
351 nat_set_blkaddr(e, new_blkaddr);
352 if (!is_valid_data_blkaddr(sbi, new_blkaddr))
353 set_nat_flag(e, IS_CHECKPOINTED, false);
354 __set_nat_cache_dirty(nm_i, e);
356 /* update fsync_mark if its inode nat entry is still alive */
357 if (ni->nid != ni->ino)
358 e = __lookup_nat_cache(nm_i, ni->ino);
360 if (fsync_done && ni->nid == ni->ino)
361 set_nat_flag(e, HAS_FSYNCED_INODE, true);
362 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
364 up_write(&nm_i->nat_tree_lock);
367 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
369 struct f2fs_nm_info *nm_i = NM_I(sbi);
372 if (!down_write_trylock(&nm_i->nat_tree_lock))
375 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
376 struct nat_entry *ne;
377 ne = list_first_entry(&nm_i->nat_entries,
378 struct nat_entry, list);
379 __del_from_nat_cache(nm_i, ne);
382 up_write(&nm_i->nat_tree_lock);
383 return nr - nr_shrink;
387 * This function always returns success
389 void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
391 struct f2fs_nm_info *nm_i = NM_I(sbi);
392 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
393 struct f2fs_journal *journal = curseg->journal;
394 nid_t start_nid = START_NID(nid);
395 struct f2fs_nat_block *nat_blk;
396 struct page *page = NULL;
397 struct f2fs_nat_entry ne;
404 /* Check nat cache */
405 down_read(&nm_i->nat_tree_lock);
406 e = __lookup_nat_cache(nm_i, nid);
408 ni->ino = nat_get_ino(e);
409 ni->blk_addr = nat_get_blkaddr(e);
410 ni->version = nat_get_version(e);
411 up_read(&nm_i->nat_tree_lock);
415 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
417 /* Check current segment summary */
418 down_read(&curseg->journal_rwsem);
419 i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
421 ne = nat_in_journal(journal, i);
422 node_info_from_raw_nat(ni, &ne);
424 up_read(&curseg->journal_rwsem);
426 up_read(&nm_i->nat_tree_lock);
430 /* Fill node_info from nat page */
431 index = current_nat_addr(sbi, nid);
432 up_read(&nm_i->nat_tree_lock);
434 page = get_meta_page(sbi, index);
435 nat_blk = (struct f2fs_nat_block *)page_address(page);
436 ne = nat_blk->entries[nid - start_nid];
437 node_info_from_raw_nat(ni, &ne);
438 f2fs_put_page(page, 1);
440 /* cache nat entry */
441 down_write(&nm_i->nat_tree_lock);
442 cache_nat_entry(sbi, nid, &ne);
443 up_write(&nm_i->nat_tree_lock);
447 * readahead MAX_RA_NODE number of node pages.
449 static void ra_node_pages(struct page *parent, int start, int n)
451 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
452 struct blk_plug plug;
456 blk_start_plug(&plug);
458 /* Then, try readahead for siblings of the desired node */
460 end = min(end, NIDS_PER_BLOCK);
461 for (i = start; i < end; i++) {
462 nid = get_nid(parent, i, false);
463 ra_node_page(sbi, nid);
466 blk_finish_plug(&plug);
469 pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
471 const long direct_index = ADDRS_PER_INODE(dn->inode);
472 const long direct_blks = ADDRS_PER_BLOCK;
473 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
474 unsigned int skipped_unit = ADDRS_PER_BLOCK;
475 int cur_level = dn->cur_level;
476 int max_level = dn->max_level;
482 while (max_level-- > cur_level)
483 skipped_unit *= NIDS_PER_BLOCK;
485 switch (dn->max_level) {
487 base += 2 * indirect_blks;
489 base += 2 * direct_blks;
491 base += direct_index;
494 f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
497 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
501 * The maximum depth is four.
502 * Offset[0] will have raw inode offset.
504 static int get_node_path(struct inode *inode, long block,
505 int offset[4], unsigned int noffset[4])
507 const long direct_index = ADDRS_PER_INODE(inode);
508 const long direct_blks = ADDRS_PER_BLOCK;
509 const long dptrs_per_blk = NIDS_PER_BLOCK;
510 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
511 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
517 if (block < direct_index) {
521 block -= direct_index;
522 if (block < direct_blks) {
523 offset[n++] = NODE_DIR1_BLOCK;
529 block -= direct_blks;
530 if (block < direct_blks) {
531 offset[n++] = NODE_DIR2_BLOCK;
537 block -= direct_blks;
538 if (block < indirect_blks) {
539 offset[n++] = NODE_IND1_BLOCK;
541 offset[n++] = block / direct_blks;
542 noffset[n] = 4 + offset[n - 1];
543 offset[n] = block % direct_blks;
547 block -= indirect_blks;
548 if (block < indirect_blks) {
549 offset[n++] = NODE_IND2_BLOCK;
550 noffset[n] = 4 + dptrs_per_blk;
551 offset[n++] = block / direct_blks;
552 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
553 offset[n] = block % direct_blks;
557 block -= indirect_blks;
558 if (block < dindirect_blks) {
559 offset[n++] = NODE_DIND_BLOCK;
560 noffset[n] = 5 + (dptrs_per_blk * 2);
561 offset[n++] = block / indirect_blks;
562 noffset[n] = 6 + (dptrs_per_blk * 2) +
563 offset[n - 1] * (dptrs_per_blk + 1);
564 offset[n++] = (block / direct_blks) % dptrs_per_blk;
565 noffset[n] = 7 + (dptrs_per_blk * 2) +
566 offset[n - 2] * (dptrs_per_blk + 1) +
568 offset[n] = block % direct_blks;
579 * Caller should call f2fs_put_dnode(dn).
580 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
581 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
582 * In the case of RDONLY_NODE, we don't need to care about mutex.
584 int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
586 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
587 struct page *npage[4];
588 struct page *parent = NULL;
590 unsigned int noffset[4];
595 level = get_node_path(dn->inode, index, offset, noffset);
599 nids[0] = dn->inode->i_ino;
600 npage[0] = dn->inode_page;
603 npage[0] = get_node_page(sbi, nids[0]);
604 if (IS_ERR(npage[0]))
605 return PTR_ERR(npage[0]);
608 /* if inline_data is set, should not report any block indices */
609 if (f2fs_has_inline_data(dn->inode) && index) {
611 f2fs_put_page(npage[0], 1);
617 nids[1] = get_nid(parent, offset[0], true);
618 dn->inode_page = npage[0];
619 dn->inode_page_locked = true;
621 /* get indirect or direct nodes */
622 for (i = 1; i <= level; i++) {
625 if (!nids[i] && mode == ALLOC_NODE) {
627 if (!alloc_nid(sbi, &(nids[i]))) {
633 npage[i] = new_node_page(dn, noffset[i]);
634 if (IS_ERR(npage[i])) {
635 alloc_nid_failed(sbi, nids[i]);
636 err = PTR_ERR(npage[i]);
640 set_nid(parent, offset[i - 1], nids[i], i == 1);
641 alloc_nid_done(sbi, nids[i]);
643 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
644 npage[i] = get_node_page_ra(parent, offset[i - 1]);
645 if (IS_ERR(npage[i])) {
646 err = PTR_ERR(npage[i]);
652 dn->inode_page_locked = false;
655 f2fs_put_page(parent, 1);
659 npage[i] = get_node_page(sbi, nids[i]);
660 if (IS_ERR(npage[i])) {
661 err = PTR_ERR(npage[i]);
662 f2fs_put_page(npage[0], 0);
668 nids[i + 1] = get_nid(parent, offset[i], false);
671 dn->nid = nids[level];
672 dn->ofs_in_node = offset[level];
673 dn->node_page = npage[level];
674 dn->data_blkaddr = datablock_addr(dn->inode,
675 dn->node_page, dn->ofs_in_node);
679 f2fs_put_page(parent, 1);
681 f2fs_put_page(npage[0], 0);
683 dn->inode_page = NULL;
684 dn->node_page = NULL;
685 if (err == -ENOENT) {
687 dn->max_level = level;
688 dn->ofs_in_node = offset[level];
693 static void truncate_node(struct dnode_of_data *dn)
695 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
699 get_node_info(sbi, dn->nid, &ni);
700 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
702 /* Deallocate node address */
703 invalidate_blocks(sbi, ni.blk_addr);
704 dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
705 set_node_addr(sbi, &ni, NULL_ADDR, false);
707 if (dn->nid == dn->inode->i_ino) {
708 remove_orphan_inode(sbi, dn->nid);
709 dec_valid_inode_count(sbi);
710 f2fs_inode_synced(dn->inode);
713 clear_node_page_dirty(dn->node_page);
714 set_sbi_flag(sbi, SBI_IS_DIRTY);
716 index = dn->node_page->index;
717 f2fs_put_page(dn->node_page, 1);
719 invalidate_mapping_pages(NODE_MAPPING(sbi),
722 dn->node_page = NULL;
723 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
726 static int truncate_dnode(struct dnode_of_data *dn)
733 /* get direct node */
734 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
735 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
737 else if (IS_ERR(page))
738 return PTR_ERR(page);
740 /* Make dnode_of_data for parameter */
741 dn->node_page = page;
743 truncate_data_blocks(dn);
748 static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
751 struct dnode_of_data rdn = *dn;
753 struct f2fs_node *rn;
755 unsigned int child_nofs;
760 return NIDS_PER_BLOCK + 1;
762 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
764 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
766 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
767 return PTR_ERR(page);
770 ra_node_pages(page, ofs, NIDS_PER_BLOCK);
772 rn = F2FS_NODE(page);
774 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
775 child_nid = le32_to_cpu(rn->in.nid[i]);
779 ret = truncate_dnode(&rdn);
782 if (set_nid(page, i, 0, false))
783 dn->node_changed = true;
786 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
787 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
788 child_nid = le32_to_cpu(rn->in.nid[i]);
789 if (child_nid == 0) {
790 child_nofs += NIDS_PER_BLOCK + 1;
794 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
795 if (ret == (NIDS_PER_BLOCK + 1)) {
796 if (set_nid(page, i, 0, false))
797 dn->node_changed = true;
799 } else if (ret < 0 && ret != -ENOENT) {
807 /* remove current indirect node */
808 dn->node_page = page;
812 f2fs_put_page(page, 1);
814 trace_f2fs_truncate_nodes_exit(dn->inode, freed);
818 f2fs_put_page(page, 1);
819 trace_f2fs_truncate_nodes_exit(dn->inode, ret);
823 static int truncate_partial_nodes(struct dnode_of_data *dn,
824 struct f2fs_inode *ri, int *offset, int depth)
826 struct page *pages[2];
833 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
837 /* get indirect nodes in the path */
838 for (i = 0; i < idx + 1; i++) {
839 /* reference count'll be increased */
840 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
841 if (IS_ERR(pages[i])) {
842 err = PTR_ERR(pages[i]);
846 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
849 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
851 /* free direct nodes linked to a partial indirect node */
852 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
853 child_nid = get_nid(pages[idx], i, false);
857 err = truncate_dnode(dn);
860 if (set_nid(pages[idx], i, 0, false))
861 dn->node_changed = true;
864 if (offset[idx + 1] == 0) {
865 dn->node_page = pages[idx];
869 f2fs_put_page(pages[idx], 1);
875 for (i = idx; i >= 0; i--)
876 f2fs_put_page(pages[i], 1);
878 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
884 * All the block addresses of data and nodes should be nullified.
886 int truncate_inode_blocks(struct inode *inode, pgoff_t from)
888 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
889 int err = 0, cont = 1;
890 int level, offset[4], noffset[4];
891 unsigned int nofs = 0;
892 struct f2fs_inode *ri;
893 struct dnode_of_data dn;
896 trace_f2fs_truncate_inode_blocks_enter(inode, from);
898 level = get_node_path(inode, from, offset, noffset);
902 page = get_node_page(sbi, inode->i_ino);
904 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
905 return PTR_ERR(page);
908 set_new_dnode(&dn, inode, page, NULL, 0);
911 ri = F2FS_INODE(page);
919 if (!offset[level - 1])
921 err = truncate_partial_nodes(&dn, ri, offset, level);
922 if (err < 0 && err != -ENOENT)
924 nofs += 1 + NIDS_PER_BLOCK;
927 nofs = 5 + 2 * NIDS_PER_BLOCK;
928 if (!offset[level - 1])
930 err = truncate_partial_nodes(&dn, ri, offset, level);
931 if (err < 0 && err != -ENOENT)
940 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
942 case NODE_DIR1_BLOCK:
943 case NODE_DIR2_BLOCK:
944 err = truncate_dnode(&dn);
947 case NODE_IND1_BLOCK:
948 case NODE_IND2_BLOCK:
949 err = truncate_nodes(&dn, nofs, offset[1], 2);
952 case NODE_DIND_BLOCK:
953 err = truncate_nodes(&dn, nofs, offset[1], 3);
960 if (err < 0 && err != -ENOENT)
962 if (offset[1] == 0 &&
963 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
965 BUG_ON(page->mapping != NODE_MAPPING(sbi));
966 f2fs_wait_on_page_writeback(page, NODE, true);
967 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
968 set_page_dirty(page);
976 f2fs_put_page(page, 0);
977 trace_f2fs_truncate_inode_blocks_exit(inode, err);
978 return err > 0 ? 0 : err;
981 int truncate_xattr_node(struct inode *inode, struct page *page)
983 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
984 nid_t nid = F2FS_I(inode)->i_xattr_nid;
985 struct dnode_of_data dn;
991 npage = get_node_page(sbi, nid);
993 return PTR_ERR(npage);
995 f2fs_i_xnid_write(inode, 0);
997 set_new_dnode(&dn, inode, page, npage, nid);
1000 dn.inode_page_locked = true;
1006 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
1009 int remove_inode_page(struct inode *inode)
1011 struct dnode_of_data dn;
1014 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1015 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
1019 err = truncate_xattr_node(inode, dn.inode_page);
1021 f2fs_put_dnode(&dn);
1025 /* remove potential inline_data blocks */
1026 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1027 S_ISLNK(inode->i_mode))
1028 truncate_data_blocks_range(&dn, 1);
1030 /* 0 is possible, after f2fs_new_inode() has failed */
1031 f2fs_bug_on(F2FS_I_SB(inode),
1032 inode->i_blocks != 0 && inode->i_blocks != 8);
1034 /* will put inode & node pages */
1039 struct page *new_inode_page(struct inode *inode)
1041 struct dnode_of_data dn;
1043 /* allocate inode page for new inode */
1044 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1046 /* caller should f2fs_put_page(page, 1); */
1047 return new_node_page(&dn, 0);
1050 struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
1052 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1053 struct node_info new_ni;
1057 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1058 return ERR_PTR(-EPERM);
1060 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
1062 return ERR_PTR(-ENOMEM);
1064 if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
1067 #ifdef CONFIG_F2FS_CHECK_FS
1068 get_node_info(sbi, dn->nid, &new_ni);
1069 f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
1071 new_ni.nid = dn->nid;
1072 new_ni.ino = dn->inode->i_ino;
1073 new_ni.blk_addr = NULL_ADDR;
1076 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1078 f2fs_wait_on_page_writeback(page, NODE, true);
1079 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
1080 set_cold_node(dn->inode, page);
1081 if (!PageUptodate(page))
1082 SetPageUptodate(page);
1083 if (set_page_dirty(page))
1084 dn->node_changed = true;
1086 if (f2fs_has_xattr_block(ofs))
1087 f2fs_i_xnid_write(dn->inode, dn->nid);
1090 inc_valid_inode_count(sbi);
1094 clear_node_page_dirty(page);
1095 f2fs_put_page(page, 1);
1096 return ERR_PTR(err);
1100 * Caller should do after getting the following values.
1101 * 0: f2fs_put_page(page, 0)
1102 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1104 static int read_node_page(struct page *page, int op_flags)
1106 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1107 struct node_info ni;
1108 struct f2fs_io_info fio = {
1112 .op_flags = op_flags,
1114 .encrypted_page = NULL,
1117 if (PageUptodate(page))
1120 get_node_info(sbi, page->index, &ni);
1122 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1123 ClearPageUptodate(page);
1127 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
1128 return f2fs_submit_page_bio(&fio);
1132 * Readahead a node page
1134 void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1141 if (check_nid_range(sbi, nid))
1145 apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
1150 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1154 err = read_node_page(apage, REQ_RAHEAD);
1155 f2fs_put_page(apage, err ? 1 : 0);
1158 static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1159 struct page *parent, int start)
1165 return ERR_PTR(-ENOENT);
1166 if (check_nid_range(sbi, nid))
1167 return ERR_PTR(-EINVAL);
1169 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1171 return ERR_PTR(-ENOMEM);
1173 err = read_node_page(page, 0);
1175 f2fs_put_page(page, 1);
1176 return ERR_PTR(err);
1177 } else if (err == LOCKED_PAGE) {
1183 ra_node_pages(parent, start + 1, MAX_RA_NODE);
1187 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1188 f2fs_put_page(page, 1);
1192 if (unlikely(!PageUptodate(page))) {
1197 if (!f2fs_inode_chksum_verify(sbi, page)) {
1202 if(unlikely(nid != nid_of_node(page))) {
1203 f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
1204 "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
1205 nid, nid_of_node(page), ino_of_node(page),
1206 ofs_of_node(page), cpver_of_node(page),
1207 next_blkaddr_of_node(page));
1210 ClearPageUptodate(page);
1211 f2fs_put_page(page, 1);
1212 return ERR_PTR(err);
1217 struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1219 return __get_node_page(sbi, nid, NULL, 0);
1222 struct page *get_node_page_ra(struct page *parent, int start)
1224 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1225 nid_t nid = get_nid(parent, start, false);
1227 return __get_node_page(sbi, nid, parent, start);
1230 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1232 struct inode *inode;
1236 /* should flush inline_data before evict_inode */
1237 inode = ilookup(sbi->sb, ino);
1241 page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
1245 if (!PageUptodate(page))
1248 if (!PageDirty(page))
1251 if (!clear_page_dirty_for_io(page))
1254 ret = f2fs_write_inline_data(inode, page);
1255 inode_dec_dirty_pages(inode);
1256 remove_dirty_inode(inode);
1258 set_page_dirty(page);
1260 f2fs_put_page(page, 1);
1265 void move_node_page(struct page *node_page, int gc_type)
1267 if (gc_type == FG_GC) {
1268 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
1269 struct writeback_control wbc = {
1270 .sync_mode = WB_SYNC_ALL,
1275 set_page_dirty(node_page);
1276 f2fs_wait_on_page_writeback(node_page, NODE, true);
1278 f2fs_bug_on(sbi, PageWriteback(node_page));
1279 if (!clear_page_dirty_for_io(node_page))
1282 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
1283 unlock_page(node_page);
1286 /* set page dirty and write it */
1287 if (!PageWriteback(node_page))
1288 set_page_dirty(node_page);
1291 unlock_page(node_page);
1293 f2fs_put_page(node_page, 0);
1296 static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1299 struct pagevec pvec;
1300 struct page *last_page = NULL;
1302 pagevec_init(&pvec, 0);
1306 while (index <= end) {
1308 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1309 PAGECACHE_TAG_DIRTY,
1310 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1314 for (i = 0; i < nr_pages; i++) {
1315 struct page *page = pvec.pages[i];
1317 if (unlikely(f2fs_cp_error(sbi))) {
1318 f2fs_put_page(last_page, 0);
1319 pagevec_release(&pvec);
1320 return ERR_PTR(-EIO);
1323 if (!IS_DNODE(page) || !is_cold_node(page))
1325 if (ino_of_node(page) != ino)
1330 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1335 if (ino_of_node(page) != ino)
1336 goto continue_unlock;
1338 if (!PageDirty(page)) {
1339 /* someone wrote it for us */
1340 goto continue_unlock;
1344 f2fs_put_page(last_page, 0);
1350 pagevec_release(&pvec);
1356 static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1357 struct writeback_control *wbc, bool do_balance,
1358 enum iostat_type io_type)
1360 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1362 struct node_info ni;
1363 struct f2fs_io_info fio = {
1367 .op_flags = wbc_to_write_flags(wbc),
1369 .encrypted_page = NULL,
1374 trace_f2fs_writepage(page, NODE);
1376 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1378 if (unlikely(f2fs_cp_error(sbi)))
1381 /* get old block addr of this node page */
1382 nid = nid_of_node(page);
1383 f2fs_bug_on(sbi, page->index != nid);
1385 if (wbc->for_reclaim) {
1386 if (!down_read_trylock(&sbi->node_write))
1389 down_read(&sbi->node_write);
1392 get_node_info(sbi, nid, &ni);
1394 /* This page is already truncated */
1395 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1396 ClearPageUptodate(page);
1397 dec_page_count(sbi, F2FS_DIRTY_NODES);
1398 up_read(&sbi->node_write);
1403 if (__is_valid_data_blkaddr(ni.blk_addr) &&
1404 !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) {
1405 up_read(&sbi->node_write);
1409 if (atomic && !test_opt(sbi, NOBARRIER))
1410 fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1412 set_page_writeback(page);
1413 fio.old_blkaddr = ni.blk_addr;
1414 write_node_page(nid, &fio);
1415 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1416 dec_page_count(sbi, F2FS_DIRTY_NODES);
1417 up_read(&sbi->node_write);
1419 if (wbc->for_reclaim) {
1420 f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
1427 if (unlikely(f2fs_cp_error(sbi))) {
1428 f2fs_submit_merged_write(sbi, NODE);
1432 *submitted = fio.submitted;
1435 f2fs_balance_fs(sbi, false);
1439 redirty_page_for_writepage(wbc, page);
1440 return AOP_WRITEPAGE_ACTIVATE;
1443 static int f2fs_write_node_page(struct page *page,
1444 struct writeback_control *wbc)
1446 return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
1449 int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1450 struct writeback_control *wbc, bool atomic)
1453 pgoff_t last_idx = ULONG_MAX;
1454 struct pagevec pvec;
1456 struct page *last_page = NULL;
1457 bool marked = false;
1458 nid_t ino = inode->i_ino;
1461 last_page = last_fsync_dnode(sbi, ino);
1462 if (IS_ERR_OR_NULL(last_page))
1463 return PTR_ERR_OR_ZERO(last_page);
1466 pagevec_init(&pvec, 0);
1470 while (index <= end) {
1472 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1473 PAGECACHE_TAG_DIRTY,
1474 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1478 for (i = 0; i < nr_pages; i++) {
1479 struct page *page = pvec.pages[i];
1480 bool submitted = false;
1482 if (unlikely(f2fs_cp_error(sbi))) {
1483 f2fs_put_page(last_page, 0);
1484 pagevec_release(&pvec);
1489 if (!IS_DNODE(page) || !is_cold_node(page))
1491 if (ino_of_node(page) != ino)
1496 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1501 if (ino_of_node(page) != ino)
1502 goto continue_unlock;
1504 if (!PageDirty(page) && page != last_page) {
1505 /* someone wrote it for us */
1506 goto continue_unlock;
1509 f2fs_wait_on_page_writeback(page, NODE, true);
1510 BUG_ON(PageWriteback(page));
1512 set_fsync_mark(page, 0);
1513 set_dentry_mark(page, 0);
1515 if (!atomic || page == last_page) {
1516 set_fsync_mark(page, 1);
1517 if (IS_INODE(page)) {
1518 if (is_inode_flag_set(inode,
1520 update_inode(inode, page);
1521 set_dentry_mark(page,
1522 need_dentry_mark(sbi, ino));
1524 /* may be written by other thread */
1525 if (!PageDirty(page))
1526 set_page_dirty(page);
1529 if (!clear_page_dirty_for_io(page))
1530 goto continue_unlock;
1532 ret = __write_node_page(page, atomic &&
1534 &submitted, wbc, true,
1538 f2fs_put_page(last_page, 0);
1540 } else if (submitted) {
1541 last_idx = page->index;
1544 if (page == last_page) {
1545 f2fs_put_page(page, 0);
1550 pagevec_release(&pvec);
1556 if (!ret && atomic && !marked) {
1557 f2fs_msg(sbi->sb, KERN_DEBUG,
1558 "Retry to write fsync mark: ino=%u, idx=%lx",
1559 ino, last_page->index);
1560 lock_page(last_page);
1561 f2fs_wait_on_page_writeback(last_page, NODE, true);
1562 set_page_dirty(last_page);
1563 unlock_page(last_page);
1567 if (last_idx != ULONG_MAX)
1568 f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
1569 return ret ? -EIO: 0;
1572 int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
1573 bool do_balance, enum iostat_type io_type)
1576 struct pagevec pvec;
1581 pagevec_init(&pvec, 0);
1587 while (index <= end) {
1589 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1590 PAGECACHE_TAG_DIRTY,
1591 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1595 for (i = 0; i < nr_pages; i++) {
1596 struct page *page = pvec.pages[i];
1597 bool submitted = false;
1599 if (unlikely(f2fs_cp_error(sbi))) {
1600 pagevec_release(&pvec);
1606 * flushing sequence with step:
1611 if (step == 0 && IS_DNODE(page))
1613 if (step == 1 && (!IS_DNODE(page) ||
1614 is_cold_node(page)))
1616 if (step == 2 && (!IS_DNODE(page) ||
1617 !is_cold_node(page)))
1620 if (wbc->sync_mode == WB_SYNC_ALL)
1622 else if (!trylock_page(page))
1625 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1631 if (!PageDirty(page)) {
1632 /* someone wrote it for us */
1633 goto continue_unlock;
1636 /* flush inline_data */
1637 if (is_inline_node(page)) {
1638 clear_inline_node(page);
1640 flush_inline_data(sbi, ino_of_node(page));
1644 f2fs_wait_on_page_writeback(page, NODE, true);
1646 BUG_ON(PageWriteback(page));
1647 if (!clear_page_dirty_for_io(page))
1648 goto continue_unlock;
1650 set_fsync_mark(page, 0);
1651 set_dentry_mark(page, 0);
1653 ret = __write_node_page(page, false, &submitted,
1654 wbc, do_balance, io_type);
1660 if (--wbc->nr_to_write == 0)
1663 pagevec_release(&pvec);
1666 if (wbc->nr_to_write == 0) {
1678 f2fs_submit_merged_write(sbi, NODE);
1682 int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1684 pgoff_t index = 0, end = ULONG_MAX;
1685 struct pagevec pvec;
1688 pagevec_init(&pvec, 0);
1690 while (index <= end) {
1692 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1693 PAGECACHE_TAG_WRITEBACK,
1694 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1698 for (i = 0; i < nr_pages; i++) {
1699 struct page *page = pvec.pages[i];
1701 /* until radix tree lookup accepts end_index */
1702 if (unlikely(page->index > end))
1705 if (ino && ino_of_node(page) == ino) {
1706 f2fs_wait_on_page_writeback(page, NODE, true);
1707 if (TestClearPageError(page))
1711 pagevec_release(&pvec);
1715 ret2 = filemap_check_errors(NODE_MAPPING(sbi));
1721 static int f2fs_write_node_pages(struct address_space *mapping,
1722 struct writeback_control *wbc)
1724 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1725 struct blk_plug plug;
1728 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1731 /* balancing f2fs's metadata in background */
1732 f2fs_balance_fs_bg(sbi);
1734 /* collect a number of dirty node pages and write together */
1735 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1738 trace_f2fs_writepages(mapping->host, wbc, NODE);
1740 diff = nr_pages_to_write(sbi, NODE, wbc);
1741 wbc->sync_mode = WB_SYNC_NONE;
1742 blk_start_plug(&plug);
1743 sync_node_pages(sbi, wbc, true, FS_NODE_IO);
1744 blk_finish_plug(&plug);
1745 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1749 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
1750 trace_f2fs_writepages(mapping->host, wbc, NODE);
1754 static int f2fs_set_node_page_dirty(struct page *page)
1756 trace_f2fs_set_page_dirty(page, NODE);
1758 if (!PageUptodate(page))
1759 SetPageUptodate(page);
1760 if (!PageDirty(page)) {
1761 f2fs_set_page_dirty_nobuffers(page);
1762 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1763 SetPagePrivate(page);
1764 f2fs_trace_pid(page);
1771 * Structure of the f2fs node operations
1773 const struct address_space_operations f2fs_node_aops = {
1774 .writepage = f2fs_write_node_page,
1775 .writepages = f2fs_write_node_pages,
1776 .set_page_dirty = f2fs_set_node_page_dirty,
1777 .invalidatepage = f2fs_invalidate_page,
1778 .releasepage = f2fs_release_page,
1779 #ifdef CONFIG_MIGRATION
1780 .migratepage = f2fs_migrate_page,
1784 static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1787 return radix_tree_lookup(&nm_i->free_nid_root, n);
1790 static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
1791 struct free_nid *i, enum nid_list list, bool new)
1793 struct f2fs_nm_info *nm_i = NM_I(sbi);
1796 int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
1801 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1802 i->state != NID_ALLOC);
1803 nm_i->nid_cnt[list]++;
1804 list_add_tail(&i->list, &nm_i->nid_list[list]);
1808 static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
1809 struct free_nid *i, enum nid_list list, bool reuse)
1811 struct f2fs_nm_info *nm_i = NM_I(sbi);
1813 f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
1814 i->state != NID_ALLOC);
1815 nm_i->nid_cnt[list]--;
1818 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1821 /* return if the nid is recognized as free */
1822 static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1824 struct f2fs_nm_info *nm_i = NM_I(sbi);
1825 struct free_nid *i, *e;
1826 struct nat_entry *ne;
1830 /* 0 nid should not be used */
1831 if (unlikely(nid == 0))
1834 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1838 if (radix_tree_preload(GFP_NOFS))
1841 spin_lock(&nm_i->nid_list_lock);
1849 * - __insert_nid_to_list(ALLOC_NID_LIST)
1850 * - f2fs_balance_fs_bg
1852 * - __build_free_nids
1855 * - __lookup_nat_cache
1857 * - init_inode_metadata
1862 * - __remove_nid_from_list(ALLOC_NID_LIST)
1863 * - __insert_nid_to_list(FREE_NID_LIST)
1865 ne = __lookup_nat_cache(nm_i, nid);
1866 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1867 nat_get_blkaddr(ne) != NULL_ADDR))
1870 e = __lookup_free_nid_list(nm_i, nid);
1872 if (e->state == NID_NEW)
1878 err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
1880 spin_unlock(&nm_i->nid_list_lock);
1881 radix_tree_preload_end();
1884 kmem_cache_free(free_nid_slab, i);
1888 static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
1890 struct f2fs_nm_info *nm_i = NM_I(sbi);
1892 bool need_free = false;
1894 spin_lock(&nm_i->nid_list_lock);
1895 i = __lookup_free_nid_list(nm_i, nid);
1896 if (i && i->state == NID_NEW) {
1897 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
1900 spin_unlock(&nm_i->nid_list_lock);
1903 kmem_cache_free(free_nid_slab, i);
1906 static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
1907 bool set, bool build)
1909 struct f2fs_nm_info *nm_i = NM_I(sbi);
1910 unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
1911 unsigned int nid_ofs = nid - START_NID(nid);
1913 if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1917 __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1919 __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
1922 nm_i->free_nid_count[nat_ofs]++;
1924 nm_i->free_nid_count[nat_ofs]--;
1927 static void scan_nat_page(struct f2fs_sb_info *sbi,
1928 struct page *nat_page, nid_t start_nid)
1930 struct f2fs_nm_info *nm_i = NM_I(sbi);
1931 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1933 unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
1936 if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
1939 __set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
1941 i = start_nid % NAT_ENTRY_PER_BLOCK;
1943 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1946 if (unlikely(start_nid >= nm_i->max_nid))
1949 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1950 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1951 if (blk_addr == NULL_ADDR)
1952 freed = add_free_nid(sbi, start_nid, true);
1953 spin_lock(&NM_I(sbi)->nid_list_lock);
1954 update_free_nid_bitmap(sbi, start_nid, freed, true);
1955 spin_unlock(&NM_I(sbi)->nid_list_lock);
1959 static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
1961 struct f2fs_nm_info *nm_i = NM_I(sbi);
1962 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1963 struct f2fs_journal *journal = curseg->journal;
1964 unsigned int i, idx;
1966 down_read(&nm_i->nat_tree_lock);
1968 for (i = 0; i < nm_i->nat_blocks; i++) {
1969 if (!test_bit_le(i, nm_i->nat_block_bitmap))
1971 if (!nm_i->free_nid_count[i])
1973 for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
1976 if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
1979 nid = i * NAT_ENTRY_PER_BLOCK + idx;
1980 add_free_nid(sbi, nid, true);
1982 if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
1987 down_read(&curseg->journal_rwsem);
1988 for (i = 0; i < nats_in_cursum(journal); i++) {
1992 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1993 nid = le32_to_cpu(nid_in_journal(journal, i));
1994 if (addr == NULL_ADDR)
1995 add_free_nid(sbi, nid, true);
1997 remove_free_nid(sbi, nid);
1999 up_read(&curseg->journal_rwsem);
2000 up_read(&nm_i->nat_tree_lock);
2003 static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2005 struct f2fs_nm_info *nm_i = NM_I(sbi);
2006 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2007 struct f2fs_journal *journal = curseg->journal;
2009 nid_t nid = nm_i->next_scan_nid;
2011 if (unlikely(nid >= nm_i->max_nid))
2014 if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
2015 nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
2017 /* Enough entries */
2018 if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
2021 if (!sync && !available_free_memory(sbi, FREE_NIDS))
2025 /* try to find free nids in free_nid_bitmap */
2026 scan_free_nid_bits(sbi);
2028 if (nm_i->nid_cnt[FREE_NID_LIST])
2032 /* readahead nat pages to be scanned */
2033 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
2036 down_read(&nm_i->nat_tree_lock);
2039 struct page *page = get_current_nat_page(sbi, nid);
2041 scan_nat_page(sbi, page, nid);
2042 f2fs_put_page(page, 1);
2044 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
2045 if (unlikely(nid >= nm_i->max_nid))
2048 if (++i >= FREE_NID_PAGES)
2052 /* go to the next free nat pages to find free nids abundantly */
2053 nm_i->next_scan_nid = nid;
2055 /* find free nids from current sum_pages */
2056 down_read(&curseg->journal_rwsem);
2057 for (i = 0; i < nats_in_cursum(journal); i++) {
2060 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
2061 nid = le32_to_cpu(nid_in_journal(journal, i));
2062 if (addr == NULL_ADDR)
2063 add_free_nid(sbi, nid, true);
2065 remove_free_nid(sbi, nid);
2067 up_read(&curseg->journal_rwsem);
2068 up_read(&nm_i->nat_tree_lock);
2070 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
2071 nm_i->ra_nid_pages, META_NAT, false);
2074 void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2076 mutex_lock(&NM_I(sbi)->build_lock);
2077 __build_free_nids(sbi, sync, mount);
2078 mutex_unlock(&NM_I(sbi)->build_lock);
2082 * If this function returns success, caller can obtain a new nid
2083 * from second parameter of this function.
2084 * The returned nid could be used ino as well as nid when inode is created.
2086 bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
2088 struct f2fs_nm_info *nm_i = NM_I(sbi);
2089 struct free_nid *i = NULL;
2091 #ifdef CONFIG_F2FS_FAULT_INJECTION
2092 if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
2093 f2fs_show_injection_info(FAULT_ALLOC_NID);
2097 spin_lock(&nm_i->nid_list_lock);
2099 if (unlikely(nm_i->available_nids == 0)) {
2100 spin_unlock(&nm_i->nid_list_lock);
2104 /* We should not use stale free nids created by build_free_nids */
2105 if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
2106 f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
2107 i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
2108 struct free_nid, list);
2111 __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
2112 i->state = NID_ALLOC;
2113 __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
2114 nm_i->available_nids--;
2116 update_free_nid_bitmap(sbi, *nid, false, false);
2118 spin_unlock(&nm_i->nid_list_lock);
2121 spin_unlock(&nm_i->nid_list_lock);
2123 /* Let's scan nat pages and its caches to get free nids */
2124 build_free_nids(sbi, true, false);
2129 * alloc_nid() should be called prior to this function.
2131 void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
2133 struct f2fs_nm_info *nm_i = NM_I(sbi);
2136 spin_lock(&nm_i->nid_list_lock);
2137 i = __lookup_free_nid_list(nm_i, nid);
2138 f2fs_bug_on(sbi, !i);
2139 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
2140 spin_unlock(&nm_i->nid_list_lock);
2142 kmem_cache_free(free_nid_slab, i);
2146 * alloc_nid() should be called prior to this function.
2148 void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
2150 struct f2fs_nm_info *nm_i = NM_I(sbi);
2152 bool need_free = false;
2157 spin_lock(&nm_i->nid_list_lock);
2158 i = __lookup_free_nid_list(nm_i, nid);
2159 f2fs_bug_on(sbi, !i);
2161 if (!available_free_memory(sbi, FREE_NIDS)) {
2162 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
2165 __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
2167 __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
2170 nm_i->available_nids++;
2172 update_free_nid_bitmap(sbi, nid, true, false);
2174 spin_unlock(&nm_i->nid_list_lock);
2177 kmem_cache_free(free_nid_slab, i);
2180 int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
2182 struct f2fs_nm_info *nm_i = NM_I(sbi);
2183 struct free_nid *i, *next;
2186 if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
2189 if (!mutex_trylock(&nm_i->build_lock))
2192 spin_lock(&nm_i->nid_list_lock);
2193 list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
2195 if (nr_shrink <= 0 ||
2196 nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
2199 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2200 kmem_cache_free(free_nid_slab, i);
2203 spin_unlock(&nm_i->nid_list_lock);
2204 mutex_unlock(&nm_i->build_lock);
2206 return nr - nr_shrink;
2209 void recover_inline_xattr(struct inode *inode, struct page *page)
2211 void *src_addr, *dst_addr;
2214 struct f2fs_inode *ri;
2216 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
2217 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
2219 ri = F2FS_INODE(page);
2220 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
2221 clear_inode_flag(inode, FI_INLINE_XATTR);
2225 dst_addr = inline_xattr_addr(ipage);
2226 src_addr = inline_xattr_addr(page);
2227 inline_size = inline_xattr_size(inode);
2229 f2fs_wait_on_page_writeback(ipage, NODE, true);
2230 memcpy(dst_addr, src_addr, inline_size);
2232 update_inode(inode, ipage);
2233 f2fs_put_page(ipage, 1);
2236 int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
2238 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2239 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2241 struct dnode_of_data dn;
2242 struct node_info ni;
2248 /* 1: invalidate the previous xattr nid */
2249 get_node_info(sbi, prev_xnid, &ni);
2250 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
2251 invalidate_blocks(sbi, ni.blk_addr);
2252 dec_valid_node_count(sbi, inode, false);
2253 set_node_addr(sbi, &ni, NULL_ADDR, false);
2256 /* 2: update xattr nid in inode */
2257 if (!alloc_nid(sbi, &new_xnid))
2260 set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
2261 xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
2262 if (IS_ERR(xpage)) {
2263 alloc_nid_failed(sbi, new_xnid);
2264 return PTR_ERR(xpage);
2267 alloc_nid_done(sbi, new_xnid);
2268 update_inode_page(inode);
2270 /* 3: update and set xattr node page dirty */
2271 memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
2273 set_page_dirty(xpage);
2274 f2fs_put_page(xpage, 1);
2279 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
2281 struct f2fs_inode *src, *dst;
2282 nid_t ino = ino_of_node(page);
2283 struct node_info old_ni, new_ni;
2286 get_node_info(sbi, ino, &old_ni);
2288 if (unlikely(old_ni.blk_addr != NULL_ADDR))
2291 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
2293 congestion_wait(BLK_RW_ASYNC, HZ/50);
2297 /* Should not use this inode from free nid list */
2298 remove_free_nid(sbi, ino);
2300 if (!PageUptodate(ipage))
2301 SetPageUptodate(ipage);
2302 fill_node_footer(ipage, ino, ino, 0, true);
2304 src = F2FS_INODE(page);
2305 dst = F2FS_INODE(ipage);
2307 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
2309 dst->i_blocks = cpu_to_le64(1);
2310 dst->i_links = cpu_to_le32(1);
2311 dst->i_xattr_nid = 0;
2312 dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
2313 if (dst->i_inline & F2FS_EXTRA_ATTR) {
2314 dst->i_extra_isize = src->i_extra_isize;
2315 if (f2fs_sb_has_project_quota(sbi->sb) &&
2316 F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2318 dst->i_projid = src->i_projid;
2324 if (unlikely(inc_valid_node_count(sbi, NULL, true)))
2326 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
2327 inc_valid_inode_count(sbi);
2328 set_page_dirty(ipage);
2329 f2fs_put_page(ipage, 1);
2333 int restore_node_summary(struct f2fs_sb_info *sbi,
2334 unsigned int segno, struct f2fs_summary_block *sum)
2336 struct f2fs_node *rn;
2337 struct f2fs_summary *sum_entry;
2339 int i, idx, last_offset, nrpages;
2341 /* scan the node segment */
2342 last_offset = sbi->blocks_per_seg;
2343 addr = START_BLOCK(sbi, segno);
2344 sum_entry = &sum->entries[0];
2346 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2347 nrpages = min(last_offset - i, BIO_MAX_PAGES);
2349 /* readahead node pages */
2350 ra_meta_pages(sbi, addr, nrpages, META_POR, true);
2352 for (idx = addr; idx < addr + nrpages; idx++) {
2353 struct page *page = get_tmp_page(sbi, idx);
2355 rn = F2FS_NODE(page);
2356 sum_entry->nid = rn->footer.nid;
2357 sum_entry->version = 0;
2358 sum_entry->ofs_in_node = 0;
2360 f2fs_put_page(page, 1);
2363 invalidate_mapping_pages(META_MAPPING(sbi), addr,
2369 static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2371 struct f2fs_nm_info *nm_i = NM_I(sbi);
2372 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2373 struct f2fs_journal *journal = curseg->journal;
2376 down_write(&curseg->journal_rwsem);
2377 for (i = 0; i < nats_in_cursum(journal); i++) {
2378 struct nat_entry *ne;
2379 struct f2fs_nat_entry raw_ne;
2380 nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
2382 raw_ne = nat_in_journal(journal, i);
2384 ne = __lookup_nat_cache(nm_i, nid);
2386 ne = grab_nat_entry(nm_i, nid, true);
2387 node_info_from_raw_nat(&ne->ni, &raw_ne);
2391 * if a free nat in journal has not been used after last
2392 * checkpoint, we should remove it from available nids,
2393 * since later we will add it again.
2395 if (!get_nat_flag(ne, IS_DIRTY) &&
2396 le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
2397 spin_lock(&nm_i->nid_list_lock);
2398 nm_i->available_nids--;
2399 spin_unlock(&nm_i->nid_list_lock);
2402 __set_nat_cache_dirty(nm_i, ne);
2404 update_nats_in_cursum(journal, -i);
2405 up_write(&curseg->journal_rwsem);
2408 static void __adjust_nat_entry_set(struct nat_entry_set *nes,
2409 struct list_head *head, int max)
2411 struct nat_entry_set *cur;
2413 if (nes->entry_cnt >= max)
2416 list_for_each_entry(cur, head, set_list) {
2417 if (cur->entry_cnt >= nes->entry_cnt) {
2418 list_add(&nes->set_list, cur->set_list.prev);
2423 list_add_tail(&nes->set_list, head);
2426 static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2429 struct f2fs_nm_info *nm_i = NM_I(sbi);
2430 unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2431 struct f2fs_nat_block *nat_blk = page_address(page);
2435 if (!enabled_nat_bits(sbi, NULL))
2438 for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
2439 if (start_nid == 0 && i == 0)
2441 if (nat_blk->entries[i].block_addr)
2445 __set_bit_le(nat_index, nm_i->empty_nat_bits);
2446 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2450 __clear_bit_le(nat_index, nm_i->empty_nat_bits);
2451 if (valid == NAT_ENTRY_PER_BLOCK)
2452 __set_bit_le(nat_index, nm_i->full_nat_bits);
2454 __clear_bit_le(nat_index, nm_i->full_nat_bits);
2457 static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2458 struct nat_entry_set *set, struct cp_control *cpc)
2460 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2461 struct f2fs_journal *journal = curseg->journal;
2462 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
2463 bool to_journal = true;
2464 struct f2fs_nat_block *nat_blk;
2465 struct nat_entry *ne, *cur;
2466 struct page *page = NULL;
2469 * there are two steps to flush nat entries:
2470 * #1, flush nat entries to journal in current hot data summary block.
2471 * #2, flush nat entries to nat page.
2473 if (enabled_nat_bits(sbi, cpc) ||
2474 !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
2478 down_write(&curseg->journal_rwsem);
2480 page = get_next_nat_page(sbi, start_nid);
2481 nat_blk = page_address(page);
2482 f2fs_bug_on(sbi, !nat_blk);
2485 /* flush dirty nats in nat entry set */
2486 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
2487 struct f2fs_nat_entry *raw_ne;
2488 nid_t nid = nat_get_nid(ne);
2491 f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
2494 offset = lookup_journal_in_cursum(journal,
2495 NAT_JOURNAL, nid, 1);
2496 f2fs_bug_on(sbi, offset < 0);
2497 raw_ne = &nat_in_journal(journal, offset);
2498 nid_in_journal(journal, offset) = cpu_to_le32(nid);
2500 raw_ne = &nat_blk->entries[nid - start_nid];
2502 raw_nat_from_node_info(raw_ne, &ne->ni);
2504 __clear_nat_cache_dirty(NM_I(sbi), set, ne);
2505 if (nat_get_blkaddr(ne) == NULL_ADDR) {
2506 add_free_nid(sbi, nid, false);
2507 spin_lock(&NM_I(sbi)->nid_list_lock);
2508 NM_I(sbi)->available_nids++;
2509 update_free_nid_bitmap(sbi, nid, true, false);
2510 spin_unlock(&NM_I(sbi)->nid_list_lock);
2512 spin_lock(&NM_I(sbi)->nid_list_lock);
2513 update_free_nid_bitmap(sbi, nid, false, false);
2514 spin_unlock(&NM_I(sbi)->nid_list_lock);
2519 up_write(&curseg->journal_rwsem);
2521 __update_nat_bits(sbi, start_nid, page);
2522 f2fs_put_page(page, 1);
2525 /* Allow dirty nats by node block allocation in write_begin */
2526 if (!set->entry_cnt) {
2527 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
2528 kmem_cache_free(nat_entry_set_slab, set);
2533 * This function is called during the checkpointing process.
2535 void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2537 struct f2fs_nm_info *nm_i = NM_I(sbi);
2538 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2539 struct f2fs_journal *journal = curseg->journal;
2540 struct nat_entry_set *setvec[SETVEC_SIZE];
2541 struct nat_entry_set *set, *tmp;
2546 if (!nm_i->dirty_nat_cnt)
2549 down_write(&nm_i->nat_tree_lock);
2552 * if there are no enough space in journal to store dirty nat
2553 * entries, remove all entries from journal and merge them
2554 * into nat entry set.
2556 if (enabled_nat_bits(sbi, cpc) ||
2557 !__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
2558 remove_nats_in_journal(sbi);
2560 while ((found = __gang_lookup_nat_set(nm_i,
2561 set_idx, SETVEC_SIZE, setvec))) {
2563 set_idx = setvec[found - 1]->set + 1;
2564 for (idx = 0; idx < found; idx++)
2565 __adjust_nat_entry_set(setvec[idx], &sets,
2566 MAX_NAT_JENTRIES(journal));
2569 /* flush dirty nats in nat entry set */
2570 list_for_each_entry_safe(set, tmp, &sets, set_list)
2571 __flush_nat_entry_set(sbi, set, cpc);
2573 up_write(&nm_i->nat_tree_lock);
2574 /* Allow dirty nats by node block allocation in write_begin */
2577 static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
2579 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2580 struct f2fs_nm_info *nm_i = NM_I(sbi);
2581 unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
2583 __u64 cp_ver = cur_cp_version(ckpt);
2584 block_t nat_bits_addr;
2586 if (!enabled_nat_bits(sbi, NULL))
2589 nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 +
2591 nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS,
2593 if (!nm_i->nat_bits)
2596 nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
2597 nm_i->nat_bits_blocks;
2598 for (i = 0; i < nm_i->nat_bits_blocks; i++) {
2599 struct page *page = get_meta_page(sbi, nat_bits_addr++);
2601 memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
2602 page_address(page), F2FS_BLKSIZE);
2603 f2fs_put_page(page, 1);
2606 cp_ver |= (cur_cp_crc(ckpt) << 32);
2607 if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
2608 disable_nat_bits(sbi, true);
2612 nm_i->full_nat_bits = nm_i->nat_bits + 8;
2613 nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
2615 f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
2619 static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
2621 struct f2fs_nm_info *nm_i = NM_I(sbi);
2623 nid_t nid, last_nid;
2625 if (!enabled_nat_bits(sbi, NULL))
2628 for (i = 0; i < nm_i->nat_blocks; i++) {
2629 i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
2630 if (i >= nm_i->nat_blocks)
2633 __set_bit_le(i, nm_i->nat_block_bitmap);
2635 nid = i * NAT_ENTRY_PER_BLOCK;
2636 last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
2638 spin_lock(&NM_I(sbi)->nid_list_lock);
2639 for (; nid < last_nid; nid++)
2640 update_free_nid_bitmap(sbi, nid, true, true);
2641 spin_unlock(&NM_I(sbi)->nid_list_lock);
2644 for (i = 0; i < nm_i->nat_blocks; i++) {
2645 i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
2646 if (i >= nm_i->nat_blocks)
2649 __set_bit_le(i, nm_i->nat_block_bitmap);
2653 static int init_node_manager(struct f2fs_sb_info *sbi)
2655 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
2656 struct f2fs_nm_info *nm_i = NM_I(sbi);
2657 unsigned char *version_bitmap;
2658 unsigned int nat_segs;
2661 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
2663 /* segment_count_nat includes pair segment so divide to 2. */
2664 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2665 nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
2666 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
2668 /* not used nids: 0, node, meta, (and root counted as valid node) */
2669 nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
2670 F2FS_RESERVED_NODE_NUM;
2671 nm_i->nid_cnt[FREE_NID_LIST] = 0;
2672 nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
2674 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
2675 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
2676 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
2678 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
2679 INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
2680 INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
2681 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
2682 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
2683 INIT_LIST_HEAD(&nm_i->nat_entries);
2685 mutex_init(&nm_i->build_lock);
2686 spin_lock_init(&nm_i->nid_list_lock);
2687 init_rwsem(&nm_i->nat_tree_lock);
2689 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
2690 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
2691 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
2692 if (!version_bitmap)
2695 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
2697 if (!nm_i->nat_bitmap)
2700 err = __get_nat_bitmaps(sbi);
2704 #ifdef CONFIG_F2FS_CHECK_FS
2705 nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
2707 if (!nm_i->nat_bitmap_mir)
2714 static int init_free_nid_cache(struct f2fs_sb_info *sbi)
2716 struct f2fs_nm_info *nm_i = NM_I(sbi);
2718 nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks *
2719 NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
2720 if (!nm_i->free_nid_bitmap)
2723 nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8,
2725 if (!nm_i->nat_block_bitmap)
2728 nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks *
2729 sizeof(unsigned short), GFP_KERNEL);
2730 if (!nm_i->free_nid_count)
2735 int build_node_manager(struct f2fs_sb_info *sbi)
2739 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
2743 err = init_node_manager(sbi);
2747 err = init_free_nid_cache(sbi);
2751 /* load free nid status from nat_bits table */
2752 load_free_nid_bitmap(sbi);
2754 build_free_nids(sbi, true, true);
2758 void destroy_node_manager(struct f2fs_sb_info *sbi)
2760 struct f2fs_nm_info *nm_i = NM_I(sbi);
2761 struct free_nid *i, *next_i;
2762 struct nat_entry *natvec[NATVEC_SIZE];
2763 struct nat_entry_set *setvec[SETVEC_SIZE];
2770 /* destroy free nid list */
2771 spin_lock(&nm_i->nid_list_lock);
2772 list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
2774 __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
2775 spin_unlock(&nm_i->nid_list_lock);
2776 kmem_cache_free(free_nid_slab, i);
2777 spin_lock(&nm_i->nid_list_lock);
2779 f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
2780 f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
2781 f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
2782 spin_unlock(&nm_i->nid_list_lock);
2784 /* destroy nat cache */
2785 down_write(&nm_i->nat_tree_lock);
2786 while ((found = __gang_lookup_nat_cache(nm_i,
2787 nid, NATVEC_SIZE, natvec))) {
2790 nid = nat_get_nid(natvec[found - 1]) + 1;
2791 for (idx = 0; idx < found; idx++)
2792 __del_from_nat_cache(nm_i, natvec[idx]);
2794 f2fs_bug_on(sbi, nm_i->nat_cnt);
2796 /* destroy nat set cache */
2798 while ((found = __gang_lookup_nat_set(nm_i,
2799 nid, SETVEC_SIZE, setvec))) {
2802 nid = setvec[found - 1]->set + 1;
2803 for (idx = 0; idx < found; idx++) {
2804 /* entry_cnt is not zero, when cp_error was occurred */
2805 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
2806 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
2807 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2810 up_write(&nm_i->nat_tree_lock);
2812 kvfree(nm_i->nat_block_bitmap);
2813 kvfree(nm_i->free_nid_bitmap);
2814 kvfree(nm_i->free_nid_count);
2816 kfree(nm_i->nat_bitmap);
2817 kfree(nm_i->nat_bits);
2818 #ifdef CONFIG_F2FS_CHECK_FS
2819 kfree(nm_i->nat_bitmap_mir);
2821 sbi->nm_info = NULL;
2825 int __init create_node_manager_caches(void)
2827 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
2828 sizeof(struct nat_entry));
2829 if (!nat_entry_slab)
2832 free_nid_slab = f2fs_kmem_cache_create("free_nid",
2833 sizeof(struct free_nid));
2835 goto destroy_nat_entry;
2837 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2838 sizeof(struct nat_entry_set));
2839 if (!nat_entry_set_slab)
2840 goto destroy_free_nid;
2844 kmem_cache_destroy(free_nid_slab);
2846 kmem_cache_destroy(nat_entry_slab);
2851 void destroy_node_manager_caches(void)
2853 kmem_cache_destroy(nat_entry_set_slab);
2854 kmem_cache_destroy(free_nid_slab);
2855 kmem_cache_destroy(nat_entry_slab);