GNU Linux-libre 4.14.324-gnu1
[releases.git] / fs / f2fs / checkpoint.c
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 static struct kmem_cache *ino_entry_slab;
27 struct kmem_cache *inode_entry_slab;
28
29 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
30 {
31         set_ckpt_flags(sbi, CP_ERROR_FLAG);
32         sbi->sb->s_flags |= MS_RDONLY;
33         if (!end_io)
34                 f2fs_flush_merged_writes(sbi);
35 }
36
37 /*
38  * We guarantee no failure on the returned page.
39  */
40 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
41 {
42         struct address_space *mapping = META_MAPPING(sbi);
43         struct page *page = NULL;
44 repeat:
45         page = f2fs_grab_cache_page(mapping, index, false);
46         if (!page) {
47                 cond_resched();
48                 goto repeat;
49         }
50         f2fs_wait_on_page_writeback(page, META, true);
51         if (!PageUptodate(page))
52                 SetPageUptodate(page);
53         return page;
54 }
55
56 /*
57  * We guarantee no failure on the returned page.
58  */
59 static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
60                                                         bool is_meta)
61 {
62         struct address_space *mapping = META_MAPPING(sbi);
63         struct page *page;
64         struct f2fs_io_info fio = {
65                 .sbi = sbi,
66                 .type = META,
67                 .op = REQ_OP_READ,
68                 .op_flags = REQ_META | REQ_PRIO,
69                 .old_blkaddr = index,
70                 .new_blkaddr = index,
71                 .encrypted_page = NULL,
72                 .is_meta = is_meta,
73         };
74
75         if (unlikely(!is_meta))
76                 fio.op_flags &= ~REQ_META;
77 repeat:
78         page = f2fs_grab_cache_page(mapping, index, false);
79         if (!page) {
80                 cond_resched();
81                 goto repeat;
82         }
83         if (PageUptodate(page))
84                 goto out;
85
86         fio.page = page;
87
88         if (f2fs_submit_page_bio(&fio)) {
89                 memset(page_address(page), 0, PAGE_SIZE);
90                 f2fs_stop_checkpoint(sbi, false);
91                 f2fs_bug_on(sbi, 1);
92                 return page;
93         }
94
95         lock_page(page);
96         if (unlikely(page->mapping != mapping)) {
97                 f2fs_put_page(page, 1);
98                 goto repeat;
99         }
100
101         /*
102          * if there is any IO error when accessing device, make our filesystem
103          * readonly and make sure do not write checkpoint with non-uptodate
104          * meta page.
105          */
106         if (unlikely(!PageUptodate(page)))
107                 f2fs_stop_checkpoint(sbi, false);
108 out:
109         return page;
110 }
111
112 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
113 {
114         return __get_meta_page(sbi, index, true);
115 }
116
117 /* for POR only */
118 struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
119 {
120         return __get_meta_page(sbi, index, false);
121 }
122
123 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
124                                         block_t blkaddr, int type)
125 {
126         switch (type) {
127         case META_NAT:
128                 break;
129         case META_SIT:
130                 if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
131                         return false;
132                 break;
133         case META_SSA:
134                 if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
135                         blkaddr < SM_I(sbi)->ssa_blkaddr))
136                         return false;
137                 break;
138         case META_CP:
139                 if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
140                         blkaddr < __start_cp_addr(sbi)))
141                         return false;
142                 break;
143         case META_POR:
144         case DATA_GENERIC:
145                 if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
146                         blkaddr < MAIN_BLKADDR(sbi))) {
147                         if (type == DATA_GENERIC) {
148                                 f2fs_msg(sbi->sb, KERN_WARNING,
149                                         "access invalid blkaddr:%u", blkaddr);
150                                 WARN_ON(1);
151                         }
152                         return false;
153                 }
154                 break;
155         case META_GENERIC:
156                 if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
157                         blkaddr >= MAIN_BLKADDR(sbi)))
158                         return false;
159                 break;
160         default:
161                 BUG();
162         }
163
164         return true;
165 }
166
167 /*
168  * Readahead CP/NAT/SIT/SSA pages
169  */
170 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
171                                                         int type, bool sync)
172 {
173         struct page *page;
174         block_t blkno = start;
175         struct f2fs_io_info fio = {
176                 .sbi = sbi,
177                 .type = META,
178                 .op = REQ_OP_READ,
179                 .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
180                 .encrypted_page = NULL,
181                 .in_list = false,
182                 .is_meta = (type != META_POR),
183         };
184         struct blk_plug plug;
185
186         if (unlikely(type == META_POR))
187                 fio.op_flags &= ~REQ_META;
188
189         blk_start_plug(&plug);
190         for (; nrpages-- > 0; blkno++) {
191
192                 if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
193                         goto out;
194
195                 switch (type) {
196                 case META_NAT:
197                         if (unlikely(blkno >=
198                                         NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
199                                 blkno = 0;
200                         /* get nat block addr */
201                         fio.new_blkaddr = current_nat_addr(sbi,
202                                         blkno * NAT_ENTRY_PER_BLOCK);
203                         break;
204                 case META_SIT:
205                         if (unlikely(blkno >= TOTAL_SEGS(sbi)))
206                                 goto out;
207                         /* get sit block addr */
208                         fio.new_blkaddr = current_sit_addr(sbi,
209                                         blkno * SIT_ENTRY_PER_BLOCK);
210                         break;
211                 case META_SSA:
212                 case META_CP:
213                 case META_POR:
214                         fio.new_blkaddr = blkno;
215                         break;
216                 default:
217                         BUG();
218                 }
219
220                 page = f2fs_grab_cache_page(META_MAPPING(sbi),
221                                                 fio.new_blkaddr, false);
222                 if (!page)
223                         continue;
224                 if (PageUptodate(page)) {
225                         f2fs_put_page(page, 1);
226                         continue;
227                 }
228
229                 fio.page = page;
230                 f2fs_submit_page_bio(&fio);
231                 f2fs_put_page(page, 0);
232         }
233 out:
234         blk_finish_plug(&plug);
235         return blkno - start;
236 }
237
238 void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
239 {
240         struct page *page;
241         bool readahead = false;
242
243         page = find_get_page(META_MAPPING(sbi), index);
244         if (!page || !PageUptodate(page))
245                 readahead = true;
246         f2fs_put_page(page, 0);
247
248         if (readahead)
249                 ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
250 }
251
252 static int __f2fs_write_meta_page(struct page *page,
253                                 struct writeback_control *wbc,
254                                 enum iostat_type io_type)
255 {
256         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
257
258         trace_f2fs_writepage(page, META);
259
260         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
261                 goto redirty_out;
262         if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
263                 goto redirty_out;
264         if (unlikely(f2fs_cp_error(sbi)))
265                 goto redirty_out;
266
267         write_meta_page(sbi, page, io_type);
268         dec_page_count(sbi, F2FS_DIRTY_META);
269
270         if (wbc->for_reclaim)
271                 f2fs_submit_merged_write_cond(sbi, page->mapping->host,
272                                                 0, page->index, META);
273
274         unlock_page(page);
275
276         if (unlikely(f2fs_cp_error(sbi)))
277                 f2fs_submit_merged_write(sbi, META);
278
279         return 0;
280
281 redirty_out:
282         redirty_page_for_writepage(wbc, page);
283         return AOP_WRITEPAGE_ACTIVATE;
284 }
285
286 static int f2fs_write_meta_page(struct page *page,
287                                 struct writeback_control *wbc)
288 {
289         return __f2fs_write_meta_page(page, wbc, FS_META_IO);
290 }
291
292 static int f2fs_write_meta_pages(struct address_space *mapping,
293                                 struct writeback_control *wbc)
294 {
295         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
296         long diff, written;
297
298         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
299                 goto skip_write;
300
301         /* collect a number of dirty meta pages and write together */
302         if (wbc->for_kupdate ||
303                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
304                 goto skip_write;
305
306         /* if locked failed, cp will flush dirty pages instead */
307         if (!mutex_trylock(&sbi->cp_mutex))
308                 goto skip_write;
309
310         trace_f2fs_writepages(mapping->host, wbc, META);
311         diff = nr_pages_to_write(sbi, META, wbc);
312         written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
313         mutex_unlock(&sbi->cp_mutex);
314         wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
315         return 0;
316
317 skip_write:
318         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
319         trace_f2fs_writepages(mapping->host, wbc, META);
320         return 0;
321 }
322
323 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
324                                 long nr_to_write, enum iostat_type io_type)
325 {
326         struct address_space *mapping = META_MAPPING(sbi);
327         pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
328         struct pagevec pvec;
329         long nwritten = 0;
330         struct writeback_control wbc = {
331                 .for_reclaim = 0,
332         };
333         struct blk_plug plug;
334
335         pagevec_init(&pvec, 0);
336
337         blk_start_plug(&plug);
338
339         while (index <= end) {
340                 int i, nr_pages;
341                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
342                                 PAGECACHE_TAG_DIRTY,
343                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
344                 if (unlikely(nr_pages == 0))
345                         break;
346
347                 for (i = 0; i < nr_pages; i++) {
348                         struct page *page = pvec.pages[i];
349
350                         if (prev == ULONG_MAX)
351                                 prev = page->index - 1;
352                         if (nr_to_write != LONG_MAX && page->index != prev + 1) {
353                                 pagevec_release(&pvec);
354                                 goto stop;
355                         }
356
357                         lock_page(page);
358
359                         if (unlikely(page->mapping != mapping)) {
360 continue_unlock:
361                                 unlock_page(page);
362                                 continue;
363                         }
364                         if (!PageDirty(page)) {
365                                 /* someone wrote it for us */
366                                 goto continue_unlock;
367                         }
368
369                         f2fs_wait_on_page_writeback(page, META, true);
370
371                         BUG_ON(PageWriteback(page));
372                         if (!clear_page_dirty_for_io(page))
373                                 goto continue_unlock;
374
375                         if (__f2fs_write_meta_page(page, &wbc, io_type)) {
376                                 unlock_page(page);
377                                 break;
378                         }
379                         nwritten++;
380                         prev = page->index;
381                         if (unlikely(nwritten >= nr_to_write))
382                                 break;
383                 }
384                 pagevec_release(&pvec);
385                 cond_resched();
386         }
387 stop:
388         if (nwritten)
389                 f2fs_submit_merged_write(sbi, type);
390
391         blk_finish_plug(&plug);
392
393         return nwritten;
394 }
395
396 static int f2fs_set_meta_page_dirty(struct page *page)
397 {
398         trace_f2fs_set_page_dirty(page, META);
399
400         if (!PageUptodate(page))
401                 SetPageUptodate(page);
402         if (!PageDirty(page)) {
403                 f2fs_set_page_dirty_nobuffers(page);
404                 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
405                 SetPagePrivate(page);
406                 f2fs_trace_pid(page);
407                 return 1;
408         }
409         return 0;
410 }
411
412 const struct address_space_operations f2fs_meta_aops = {
413         .writepage      = f2fs_write_meta_page,
414         .writepages     = f2fs_write_meta_pages,
415         .set_page_dirty = f2fs_set_meta_page_dirty,
416         .invalidatepage = f2fs_invalidate_page,
417         .releasepage    = f2fs_release_page,
418 #ifdef CONFIG_MIGRATION
419         .migratepage    = f2fs_migrate_page,
420 #endif
421 };
422
423 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
424 {
425         struct inode_management *im = &sbi->im[type];
426         struct ino_entry *e, *tmp;
427
428         tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
429 retry:
430         radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
431
432         spin_lock(&im->ino_lock);
433         e = radix_tree_lookup(&im->ino_root, ino);
434         if (!e) {
435                 e = tmp;
436                 if (radix_tree_insert(&im->ino_root, ino, e)) {
437                         spin_unlock(&im->ino_lock);
438                         radix_tree_preload_end();
439                         goto retry;
440                 }
441                 memset(e, 0, sizeof(struct ino_entry));
442                 e->ino = ino;
443
444                 list_add_tail(&e->list, &im->ino_list);
445                 if (type != ORPHAN_INO)
446                         im->ino_num++;
447         }
448         spin_unlock(&im->ino_lock);
449         radix_tree_preload_end();
450
451         if (e != tmp)
452                 kmem_cache_free(ino_entry_slab, tmp);
453 }
454
455 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
456 {
457         struct inode_management *im = &sbi->im[type];
458         struct ino_entry *e;
459
460         spin_lock(&im->ino_lock);
461         e = radix_tree_lookup(&im->ino_root, ino);
462         if (e) {
463                 list_del(&e->list);
464                 radix_tree_delete(&im->ino_root, ino);
465                 im->ino_num--;
466                 spin_unlock(&im->ino_lock);
467                 kmem_cache_free(ino_entry_slab, e);
468                 return;
469         }
470         spin_unlock(&im->ino_lock);
471 }
472
473 void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
474 {
475         /* add new dirty ino entry into list */
476         __add_ino_entry(sbi, ino, type);
477 }
478
479 void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
480 {
481         /* remove dirty ino entry from list */
482         __remove_ino_entry(sbi, ino, type);
483 }
484
485 /* mode should be APPEND_INO or UPDATE_INO */
486 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
487 {
488         struct inode_management *im = &sbi->im[mode];
489         struct ino_entry *e;
490
491         spin_lock(&im->ino_lock);
492         e = radix_tree_lookup(&im->ino_root, ino);
493         spin_unlock(&im->ino_lock);
494         return e ? true : false;
495 }
496
497 void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
498 {
499         struct ino_entry *e, *tmp;
500         int i;
501
502         for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
503                 struct inode_management *im = &sbi->im[i];
504
505                 spin_lock(&im->ino_lock);
506                 list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
507                         list_del(&e->list);
508                         radix_tree_delete(&im->ino_root, e->ino);
509                         kmem_cache_free(ino_entry_slab, e);
510                         im->ino_num--;
511                 }
512                 spin_unlock(&im->ino_lock);
513         }
514 }
515
516 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
517 {
518         struct inode_management *im = &sbi->im[ORPHAN_INO];
519         int err = 0;
520
521         spin_lock(&im->ino_lock);
522
523 #ifdef CONFIG_F2FS_FAULT_INJECTION
524         if (time_to_inject(sbi, FAULT_ORPHAN)) {
525                 spin_unlock(&im->ino_lock);
526                 f2fs_show_injection_info(FAULT_ORPHAN);
527                 return -ENOSPC;
528         }
529 #endif
530         if (unlikely(im->ino_num >= sbi->max_orphans))
531                 err = -ENOSPC;
532         else
533                 im->ino_num++;
534         spin_unlock(&im->ino_lock);
535
536         return err;
537 }
538
539 void release_orphan_inode(struct f2fs_sb_info *sbi)
540 {
541         struct inode_management *im = &sbi->im[ORPHAN_INO];
542
543         spin_lock(&im->ino_lock);
544         f2fs_bug_on(sbi, im->ino_num == 0);
545         im->ino_num--;
546         spin_unlock(&im->ino_lock);
547 }
548
549 void add_orphan_inode(struct inode *inode)
550 {
551         /* add new orphan ino entry into list */
552         __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
553         update_inode_page(inode);
554 }
555
556 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
557 {
558         /* remove orphan entry from orphan list */
559         __remove_ino_entry(sbi, ino, ORPHAN_INO);
560 }
561
562 static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
563 {
564         struct inode *inode;
565         struct node_info ni;
566         int err = acquire_orphan_inode(sbi);
567
568         if (err) {
569                 set_sbi_flag(sbi, SBI_NEED_FSCK);
570                 f2fs_msg(sbi->sb, KERN_WARNING,
571                                 "%s: orphan failed (ino=%x), run fsck to fix.",
572                                 __func__, ino);
573                 return err;
574         }
575
576         __add_ino_entry(sbi, ino, ORPHAN_INO);
577
578         inode = f2fs_iget_retry(sbi->sb, ino);
579         if (IS_ERR(inode)) {
580                 /*
581                  * there should be a bug that we can't find the entry
582                  * to orphan inode.
583                  */
584                 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
585                 return PTR_ERR(inode);
586         }
587
588         clear_nlink(inode);
589
590         /* truncate all the data during iput */
591         iput(inode);
592
593         get_node_info(sbi, ino, &ni);
594
595         /* ENOMEM was fully retried in f2fs_evict_inode. */
596         if (ni.blk_addr != NULL_ADDR) {
597                 set_sbi_flag(sbi, SBI_NEED_FSCK);
598                 f2fs_msg(sbi->sb, KERN_WARNING,
599                         "%s: orphan failed (ino=%x) by kernel, retry mount.",
600                                 __func__, ino);
601                 return -EIO;
602         }
603         __remove_ino_entry(sbi, ino, ORPHAN_INO);
604         return 0;
605 }
606
607 int recover_orphan_inodes(struct f2fs_sb_info *sbi)
608 {
609         block_t start_blk, orphan_blocks, i, j;
610         unsigned int s_flags = sbi->sb->s_flags;
611         int err = 0;
612
613         if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
614                 return 0;
615
616         if (s_flags & MS_RDONLY) {
617                 f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
618                 sbi->sb->s_flags &= ~MS_RDONLY;
619         }
620
621 #ifdef CONFIG_QUOTA
622         /* Needed for iput() to work correctly and not trash data */
623         sbi->sb->s_flags |= MS_ACTIVE;
624         /* Turn on quotas so that they are updated correctly */
625         f2fs_enable_quota_files(sbi);
626 #endif
627
628         start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
629         orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
630
631         ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
632
633         for (i = 0; i < orphan_blocks; i++) {
634                 struct page *page = get_meta_page(sbi, start_blk + i);
635                 struct f2fs_orphan_block *orphan_blk;
636
637                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
638                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
639                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
640                         err = recover_orphan_inode(sbi, ino);
641                         if (err) {
642                                 f2fs_put_page(page, 1);
643                                 goto out;
644                         }
645                 }
646                 f2fs_put_page(page, 1);
647         }
648         /* clear Orphan Flag */
649         clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
650 out:
651 #ifdef CONFIG_QUOTA
652         /* Turn quotas off */
653         f2fs_quota_off_umount(sbi->sb);
654 #endif
655         sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
656
657         return err;
658 }
659
660 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
661 {
662         struct list_head *head;
663         struct f2fs_orphan_block *orphan_blk = NULL;
664         unsigned int nentries = 0;
665         unsigned short index = 1;
666         unsigned short orphan_blocks;
667         struct page *page = NULL;
668         struct ino_entry *orphan = NULL;
669         struct inode_management *im = &sbi->im[ORPHAN_INO];
670
671         orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
672
673         /*
674          * we don't need to do spin_lock(&im->ino_lock) here, since all the
675          * orphan inode operations are covered under f2fs_lock_op().
676          * And, spin_lock should be avoided due to page operations below.
677          */
678         head = &im->ino_list;
679
680         /* loop for each orphan inode entry and write them in Jornal block */
681         list_for_each_entry(orphan, head, list) {
682                 if (!page) {
683                         page = grab_meta_page(sbi, start_blk++);
684                         orphan_blk =
685                                 (struct f2fs_orphan_block *)page_address(page);
686                         memset(orphan_blk, 0, sizeof(*orphan_blk));
687                 }
688
689                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
690
691                 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
692                         /*
693                          * an orphan block is full of 1020 entries,
694                          * then we need to flush current orphan blocks
695                          * and bring another one in memory
696                          */
697                         orphan_blk->blk_addr = cpu_to_le16(index);
698                         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
699                         orphan_blk->entry_count = cpu_to_le32(nentries);
700                         set_page_dirty(page);
701                         f2fs_put_page(page, 1);
702                         index++;
703                         nentries = 0;
704                         page = NULL;
705                 }
706         }
707
708         if (page) {
709                 orphan_blk->blk_addr = cpu_to_le16(index);
710                 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
711                 orphan_blk->entry_count = cpu_to_le32(nentries);
712                 set_page_dirty(page);
713                 f2fs_put_page(page, 1);
714         }
715 }
716
717 static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
718                 struct f2fs_checkpoint **cp_block, struct page **cp_page,
719                 unsigned long long *version)
720 {
721         unsigned long blk_size = sbi->blocksize;
722         size_t crc_offset = 0;
723         __u32 crc = 0;
724
725         *cp_page = get_meta_page(sbi, cp_addr);
726         *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
727
728         crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
729         if (crc_offset > (blk_size - sizeof(__le32))) {
730                 f2fs_put_page(*cp_page, 1);
731                 f2fs_msg(sbi->sb, KERN_WARNING,
732                         "invalid crc_offset: %zu", crc_offset);
733                 return -EINVAL;
734         }
735
736         crc = cur_cp_crc(*cp_block);
737         if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
738                 f2fs_put_page(*cp_page, 1);
739                 f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
740                 return -EINVAL;
741         }
742
743         *version = cur_cp_version(*cp_block);
744         return 0;
745 }
746
747 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
748                                 block_t cp_addr, unsigned long long *version)
749 {
750         struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
751         struct f2fs_checkpoint *cp_block = NULL;
752         unsigned long long cur_version = 0, pre_version = 0;
753         int err;
754
755         err = get_checkpoint_version(sbi, cp_addr, &cp_block,
756                                         &cp_page_1, version);
757         if (err)
758                 return NULL;
759
760         if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
761                                         sbi->blocks_per_seg) {
762                 f2fs_msg(sbi->sb, KERN_WARNING,
763                         "invalid cp_pack_total_block_count:%u",
764                         le32_to_cpu(cp_block->cp_pack_total_block_count));
765                 goto invalid_cp;
766         }
767         pre_version = *version;
768
769         cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
770         err = get_checkpoint_version(sbi, cp_addr, &cp_block,
771                                         &cp_page_2, version);
772         if (err)
773                 goto invalid_cp;
774         cur_version = *version;
775
776         if (cur_version == pre_version) {
777                 *version = cur_version;
778                 f2fs_put_page(cp_page_2, 1);
779                 return cp_page_1;
780         }
781         f2fs_put_page(cp_page_2, 1);
782 invalid_cp:
783         f2fs_put_page(cp_page_1, 1);
784         return NULL;
785 }
786
787 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
788 {
789         struct f2fs_checkpoint *cp_block;
790         struct f2fs_super_block *fsb = sbi->raw_super;
791         struct page *cp1, *cp2, *cur_page;
792         unsigned long blk_size = sbi->blocksize;
793         unsigned long long cp1_version = 0, cp2_version = 0;
794         unsigned long long cp_start_blk_no;
795         unsigned int cp_blks = 1 + __cp_payload(sbi);
796         block_t cp_blk_no;
797         int i;
798         int err;
799
800         sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
801         if (!sbi->ckpt)
802                 return -ENOMEM;
803         /*
804          * Finding out valid cp block involves read both
805          * sets( cp pack1 and cp pack 2)
806          */
807         cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
808         cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
809
810         /* The second checkpoint pack should start at the next segment */
811         cp_start_blk_no += ((unsigned long long)1) <<
812                                 le32_to_cpu(fsb->log_blocks_per_seg);
813         cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
814
815         if (cp1 && cp2) {
816                 if (ver_after(cp2_version, cp1_version))
817                         cur_page = cp2;
818                 else
819                         cur_page = cp1;
820         } else if (cp1) {
821                 cur_page = cp1;
822         } else if (cp2) {
823                 cur_page = cp2;
824         } else {
825                 err = -EFSCORRUPTED;
826                 goto fail_no_cp;
827         }
828
829         cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
830         memcpy(sbi->ckpt, cp_block, blk_size);
831
832         if (cur_page == cp1)
833                 sbi->cur_cp_pack = 1;
834         else
835                 sbi->cur_cp_pack = 2;
836
837         /* Sanity checking of checkpoint */
838         if (sanity_check_ckpt(sbi)) {
839                 err = -EFSCORRUPTED;
840                 goto free_fail_no_cp;
841         }
842
843         if (cp_blks <= 1)
844                 goto done;
845
846         cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
847         if (cur_page == cp2)
848                 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
849
850         for (i = 1; i < cp_blks; i++) {
851                 void *sit_bitmap_ptr;
852                 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
853
854                 cur_page = get_meta_page(sbi, cp_blk_no + i);
855                 sit_bitmap_ptr = page_address(cur_page);
856                 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
857                 f2fs_put_page(cur_page, 1);
858         }
859 done:
860         f2fs_put_page(cp1, 1);
861         f2fs_put_page(cp2, 1);
862         return 0;
863
864 free_fail_no_cp:
865         f2fs_put_page(cp1, 1);
866         f2fs_put_page(cp2, 1);
867 fail_no_cp:
868         kfree(sbi->ckpt);
869         return err;
870 }
871
872 static void __add_dirty_inode(struct inode *inode, enum inode_type type)
873 {
874         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
875         int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
876
877         if (is_inode_flag_set(inode, flag))
878                 return;
879
880         set_inode_flag(inode, flag);
881         if (!f2fs_is_volatile_file(inode))
882                 list_add_tail(&F2FS_I(inode)->dirty_list,
883                                                 &sbi->inode_list[type]);
884         stat_inc_dirty_inode(sbi, type);
885 }
886
887 static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
888 {
889         int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
890
891         if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
892                 return;
893
894         list_del_init(&F2FS_I(inode)->dirty_list);
895         clear_inode_flag(inode, flag);
896         stat_dec_dirty_inode(F2FS_I_SB(inode), type);
897 }
898
899 void update_dirty_page(struct inode *inode, struct page *page)
900 {
901         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
902         enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
903
904         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
905                         !S_ISLNK(inode->i_mode))
906                 return;
907
908         spin_lock(&sbi->inode_lock[type]);
909         if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
910                 __add_dirty_inode(inode, type);
911         inode_inc_dirty_pages(inode);
912         spin_unlock(&sbi->inode_lock[type]);
913
914         SetPagePrivate(page);
915         f2fs_trace_pid(page);
916 }
917
918 void remove_dirty_inode(struct inode *inode)
919 {
920         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
921         enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
922
923         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
924                         !S_ISLNK(inode->i_mode))
925                 return;
926
927         if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
928                 return;
929
930         spin_lock(&sbi->inode_lock[type]);
931         __remove_dirty_inode(inode, type);
932         spin_unlock(&sbi->inode_lock[type]);
933 }
934
935 int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
936 {
937         struct list_head *head;
938         struct inode *inode;
939         struct f2fs_inode_info *fi;
940         bool is_dir = (type == DIR_INODE);
941         unsigned long ino = 0;
942
943         trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
944                                 get_pages(sbi, is_dir ?
945                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
946 retry:
947         if (unlikely(f2fs_cp_error(sbi))) {
948                 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
949                                 get_pages(sbi, is_dir ?
950                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
951                 return -EIO;
952         }
953
954         spin_lock(&sbi->inode_lock[type]);
955
956         head = &sbi->inode_list[type];
957         if (list_empty(head)) {
958                 spin_unlock(&sbi->inode_lock[type]);
959                 trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
960                                 get_pages(sbi, is_dir ?
961                                 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
962                 return 0;
963         }
964         fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
965         inode = igrab(&fi->vfs_inode);
966         spin_unlock(&sbi->inode_lock[type]);
967         if (inode) {
968                 unsigned long cur_ino = inode->i_ino;
969
970                 if (is_dir)
971                         F2FS_I(inode)->cp_task = current;
972
973                 filemap_fdatawrite(inode->i_mapping);
974
975                 if (is_dir)
976                         F2FS_I(inode)->cp_task = NULL;
977
978                 iput(inode);
979                 /* We need to give cpu to another writers. */
980                 if (ino == cur_ino) {
981                         congestion_wait(BLK_RW_ASYNC, HZ/50);
982                         cond_resched();
983                 } else {
984                         ino = cur_ino;
985                 }
986         } else {
987                 /*
988                  * We should submit bio, since it exists several
989                  * wribacking dentry pages in the freeing inode.
990                  */
991                 f2fs_submit_merged_write(sbi, DATA);
992                 cond_resched();
993         }
994         goto retry;
995 }
996
997 int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
998 {
999         struct list_head *head = &sbi->inode_list[DIRTY_META];
1000         struct inode *inode;
1001         struct f2fs_inode_info *fi;
1002         s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);
1003
1004         while (total--) {
1005                 if (unlikely(f2fs_cp_error(sbi)))
1006                         return -EIO;
1007
1008                 spin_lock(&sbi->inode_lock[DIRTY_META]);
1009                 if (list_empty(head)) {
1010                         spin_unlock(&sbi->inode_lock[DIRTY_META]);
1011                         return 0;
1012                 }
1013                 fi = list_first_entry(head, struct f2fs_inode_info,
1014                                                         gdirty_list);
1015                 inode = igrab(&fi->vfs_inode);
1016                 spin_unlock(&sbi->inode_lock[DIRTY_META]);
1017                 if (inode) {
1018                         sync_inode_metadata(inode, 0);
1019
1020                         /* it's on eviction */
1021                         if (is_inode_flag_set(inode, FI_DIRTY_INODE))
1022                                 update_inode_page(inode);
1023                         iput(inode);
1024                 }
1025         };
1026         return 0;
1027 }
1028
1029 static void __prepare_cp_block(struct f2fs_sb_info *sbi)
1030 {
1031         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1032         struct f2fs_nm_info *nm_i = NM_I(sbi);
1033         nid_t last_nid = nm_i->next_scan_nid;
1034
1035         next_free_nid(sbi, &last_nid);
1036         ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
1037         ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
1038         ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
1039         ckpt->next_free_nid = cpu_to_le32(last_nid);
1040 }
1041
1042 /*
1043  * Freeze all the FS-operations for checkpoint.
1044  */
1045 static int block_operations(struct f2fs_sb_info *sbi)
1046 {
1047         struct writeback_control wbc = {
1048                 .sync_mode = WB_SYNC_ALL,
1049                 .nr_to_write = LONG_MAX,
1050                 .for_reclaim = 0,
1051         };
1052         struct blk_plug plug;
1053         int err = 0;
1054
1055         blk_start_plug(&plug);
1056
1057 retry_flush_dents:
1058         f2fs_lock_all(sbi);
1059         /* write all the dirty dentry pages */
1060         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
1061                 f2fs_unlock_all(sbi);
1062                 err = sync_dirty_inodes(sbi, DIR_INODE);
1063                 if (err)
1064                         goto out;
1065                 cond_resched();
1066                 goto retry_flush_dents;
1067         }
1068
1069         /*
1070          * POR: we should ensure that there are no dirty node pages
1071          * until finishing nat/sit flush. inode->i_blocks can be updated.
1072          */
1073         down_write(&sbi->node_change);
1074
1075         if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
1076                 up_write(&sbi->node_change);
1077                 f2fs_unlock_all(sbi);
1078                 err = f2fs_sync_inode_meta(sbi);
1079                 if (err)
1080                         goto out;
1081                 cond_resched();
1082                 goto retry_flush_dents;
1083         }
1084
1085 retry_flush_nodes:
1086         down_write(&sbi->node_write);
1087
1088         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
1089                 up_write(&sbi->node_write);
1090                 err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
1091                 if (err) {
1092                         up_write(&sbi->node_change);
1093                         f2fs_unlock_all(sbi);
1094                         goto out;
1095                 }
1096                 cond_resched();
1097                 goto retry_flush_nodes;
1098         }
1099
1100         /*
1101          * sbi->node_change is used only for AIO write_begin path which produces
1102          * dirty node blocks and some checkpoint values by block allocation.
1103          */
1104         __prepare_cp_block(sbi);
1105         up_write(&sbi->node_change);
1106 out:
1107         blk_finish_plug(&plug);
1108         return err;
1109 }
1110
1111 static void unblock_operations(struct f2fs_sb_info *sbi)
1112 {
1113         up_write(&sbi->node_write);
1114         f2fs_unlock_all(sbi);
1115 }
1116
1117 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
1118 {
1119         DEFINE_WAIT(wait);
1120
1121         for (;;) {
1122                 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
1123
1124                 if (!get_pages(sbi, F2FS_WB_CP_DATA))
1125                         break;
1126
1127                 io_schedule_timeout(5*HZ);
1128         }
1129         finish_wait(&sbi->cp_wait, &wait);
1130 }
1131
1132 static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1133 {
1134         unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1135         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1136         unsigned long flags;
1137
1138         spin_lock_irqsave(&sbi->cp_lock, flags);
1139
1140         if ((cpc->reason & CP_UMOUNT) &&
1141                         le32_to_cpu(ckpt->cp_pack_total_block_count) >
1142                         sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
1143                 disable_nat_bits(sbi, false);
1144
1145         if (cpc->reason & CP_TRIMMED)
1146                 __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1147         else
1148                 __clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1149
1150         if (cpc->reason & CP_UMOUNT)
1151                 __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1152         else
1153                 __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
1154
1155         if (cpc->reason & CP_FASTBOOT)
1156                 __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1157         else
1158                 __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
1159
1160         if (orphan_num)
1161                 __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1162         else
1163                 __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
1164
1165         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1166                 __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
1167
1168         /* set this flag to activate crc|cp_ver for recovery */
1169         __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1170
1171         spin_unlock_irqrestore(&sbi->cp_lock, flags);
1172 }
1173
1174 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1175 {
1176         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1177         struct f2fs_nm_info *nm_i = NM_I(sbi);
1178         unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
1179         block_t start_blk;
1180         unsigned int data_sum_blocks, orphan_blocks;
1181         __u32 crc32 = 0;
1182         int i;
1183         int cp_payload_blks = __cp_payload(sbi);
1184         struct super_block *sb = sbi->sb;
1185         struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
1186         u64 kbytes_written;
1187
1188         /* Flush all the NAT/SIT pages */
1189         while (get_pages(sbi, F2FS_DIRTY_META)) {
1190                 sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1191                 if (unlikely(f2fs_cp_error(sbi)))
1192                         return -EIO;
1193         }
1194
1195         /*
1196          * modify checkpoint
1197          * version number is already updated
1198          */
1199         ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
1200         ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
1201         for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
1202                 ckpt->cur_node_segno[i] =
1203                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
1204                 ckpt->cur_node_blkoff[i] =
1205                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
1206                 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
1207                                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
1208         }
1209         for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
1210                 ckpt->cur_data_segno[i] =
1211                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
1212                 ckpt->cur_data_blkoff[i] =
1213                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
1214                 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
1215                                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
1216         }
1217
1218         /* 2 cp  + n data seg summary + orphan inode blocks */
1219         data_sum_blocks = npages_for_summary_flush(sbi, false);
1220         spin_lock_irqsave(&sbi->cp_lock, flags);
1221         if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1222                 __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1223         else
1224                 __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1225         spin_unlock_irqrestore(&sbi->cp_lock, flags);
1226
1227         orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
1228         ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
1229                         orphan_blocks);
1230
1231         if (__remain_node_summaries(cpc->reason))
1232                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
1233                                 cp_payload_blks + data_sum_blocks +
1234                                 orphan_blocks + NR_CURSEG_NODE_TYPE);
1235         else
1236                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
1237                                 cp_payload_blks + data_sum_blocks +
1238                                 orphan_blocks);
1239
1240         /* update ckpt flag for checkpoint */
1241         update_ckpt_flags(sbi, cpc);
1242
1243         /* update SIT/NAT bitmap */
1244         get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
1245         get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
1246
1247         crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
1248         *((__le32 *)((unsigned char *)ckpt +
1249                                 le32_to_cpu(ckpt->checksum_offset)))
1250                                 = cpu_to_le32(crc32);
1251
1252         start_blk = __start_cp_next_addr(sbi);
1253
1254         /* write nat bits */
1255         if (enabled_nat_bits(sbi, cpc)) {
1256                 __u64 cp_ver = cur_cp_version(ckpt);
1257                 block_t blk;
1258
1259                 cp_ver |= ((__u64)crc32 << 32);
1260                 *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
1261
1262                 blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
1263                 for (i = 0; i < nm_i->nat_bits_blocks; i++)
1264                         update_meta_page(sbi, nm_i->nat_bits +
1265                                         (i << F2FS_BLKSIZE_BITS), blk + i);
1266
1267                 /* Flush all the NAT BITS pages */
1268                 while (get_pages(sbi, F2FS_DIRTY_META)) {
1269                         sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1270                         if (unlikely(f2fs_cp_error(sbi)))
1271                                 return -EIO;
1272                 }
1273         }
1274
1275         /* need to wait for end_io results */
1276         wait_on_all_pages_writeback(sbi);
1277         if (unlikely(f2fs_cp_error(sbi)))
1278                 return -EIO;
1279
1280         /* write out checkpoint buffer at block 0 */
1281         update_meta_page(sbi, ckpt, start_blk++);
1282
1283         for (i = 1; i < 1 + cp_payload_blks; i++)
1284                 update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
1285                                                         start_blk++);
1286
1287         if (orphan_num) {
1288                 write_orphan_inodes(sbi, start_blk);
1289                 start_blk += orphan_blocks;
1290         }
1291
1292         write_data_summaries(sbi, start_blk);
1293         start_blk += data_sum_blocks;
1294
1295         /* Record write statistics in the hot node summary */
1296         kbytes_written = sbi->kbytes_written;
1297         if (sb->s_bdev->bd_part)
1298                 kbytes_written += BD_PART_WRITTEN(sbi);
1299
1300         seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1301
1302         if (__remain_node_summaries(cpc->reason)) {
1303                 write_node_summaries(sbi, start_blk);
1304                 start_blk += NR_CURSEG_NODE_TYPE;
1305         }
1306
1307         /* writeout checkpoint block */
1308         update_meta_page(sbi, ckpt, start_blk);
1309
1310         /* wait for previous submitted node/meta pages writeback */
1311         wait_on_all_pages_writeback(sbi);
1312
1313         if (unlikely(f2fs_cp_error(sbi)))
1314                 return -EIO;
1315
1316         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
1317         filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
1318
1319         /* update user_block_counts */
1320         sbi->last_valid_block_count = sbi->total_valid_block_count;
1321         percpu_counter_set(&sbi->alloc_valid_block_count, 0);
1322
1323         /* Here, we only have one bio having CP pack */
1324         sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
1325
1326         /* wait for previous submitted meta pages writeback */
1327         wait_on_all_pages_writeback(sbi);
1328
1329         release_ino_entry(sbi, false);
1330
1331         if (unlikely(f2fs_cp_error(sbi)))
1332                 return -EIO;
1333
1334         clear_sbi_flag(sbi, SBI_IS_DIRTY);
1335         clear_sbi_flag(sbi, SBI_NEED_CP);
1336         __set_cp_next_pack(sbi);
1337
1338         /*
1339          * redirty superblock if metadata like node page or inode cache is
1340          * updated during writing checkpoint.
1341          */
1342         if (get_pages(sbi, F2FS_DIRTY_NODES) ||
1343                         get_pages(sbi, F2FS_DIRTY_IMETA))
1344                 set_sbi_flag(sbi, SBI_IS_DIRTY);
1345
1346         f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
1347
1348         return 0;
1349 }
1350
1351 /*
1352  * We guarantee that this checkpoint procedure will not fail.
1353  */
1354 int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1355 {
1356         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1357         unsigned long long ckpt_ver;
1358         int err = 0;
1359
1360         mutex_lock(&sbi->cp_mutex);
1361
1362         if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1363                 ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
1364                 ((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
1365                 goto out;
1366         if (unlikely(f2fs_cp_error(sbi))) {
1367                 err = -EIO;
1368                 goto out;
1369         }
1370         if (f2fs_readonly(sbi->sb)) {
1371                 err = -EROFS;
1372                 goto out;
1373         }
1374
1375         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1376
1377         err = block_operations(sbi);
1378         if (err)
1379                 goto out;
1380
1381         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1382
1383         f2fs_flush_merged_writes(sbi);
1384
1385         /* this is the case of multiple fstrims without any changes */
1386         if (cpc->reason & CP_DISCARD) {
1387                 if (!exist_trim_candidates(sbi, cpc)) {
1388                         unblock_operations(sbi);
1389                         goto out;
1390                 }
1391
1392                 if (NM_I(sbi)->dirty_nat_cnt == 0 &&
1393                                 SIT_I(sbi)->dirty_sentries == 0 &&
1394                                 prefree_segments(sbi) == 0) {
1395                         flush_sit_entries(sbi, cpc);
1396                         clear_prefree_segments(sbi, cpc);
1397                         unblock_operations(sbi);
1398                         goto out;
1399                 }
1400         }
1401
1402         /*
1403          * update checkpoint pack index
1404          * Increase the version number so that
1405          * SIT entries and seg summaries are written at correct place
1406          */
1407         ckpt_ver = cur_cp_version(ckpt);
1408         ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1409
1410         /* write cached NAT/SIT entries to NAT/SIT area */
1411         flush_nat_entries(sbi, cpc);
1412         flush_sit_entries(sbi, cpc);
1413
1414         /* unlock all the fs_lock[] in do_checkpoint() */
1415         err = do_checkpoint(sbi, cpc);
1416         if (err)
1417                 release_discard_addrs(sbi);
1418         else
1419                 clear_prefree_segments(sbi, cpc);
1420
1421         unblock_operations(sbi);
1422         stat_inc_cp_count(sbi->stat_info);
1423
1424         if (cpc->reason & CP_RECOVERY)
1425                 f2fs_msg(sbi->sb, KERN_NOTICE,
1426                         "checkpoint: version = %llx", ckpt_ver);
1427
1428         /* do checkpoint periodically */
1429         f2fs_update_time(sbi, CP_TIME);
1430         trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
1431 out:
1432         mutex_unlock(&sbi->cp_mutex);
1433         return err;
1434 }
1435
1436 void init_ino_entry_info(struct f2fs_sb_info *sbi)
1437 {
1438         int i;
1439
1440         for (i = 0; i < MAX_INO_ENTRY; i++) {
1441                 struct inode_management *im = &sbi->im[i];
1442
1443                 INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
1444                 spin_lock_init(&im->ino_lock);
1445                 INIT_LIST_HEAD(&im->ino_list);
1446                 im->ino_num = 0;
1447         }
1448
1449         sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1450                         NR_CURSEG_TYPE - __cp_payload(sbi)) *
1451                                 F2FS_ORPHANS_PER_BLOCK;
1452 }
1453
1454 int __init create_checkpoint_caches(void)
1455 {
1456         ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1457                         sizeof(struct ino_entry));
1458         if (!ino_entry_slab)
1459                 return -ENOMEM;
1460         inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
1461                         sizeof(struct inode_entry));
1462         if (!inode_entry_slab) {
1463                 kmem_cache_destroy(ino_entry_slab);
1464                 return -ENOMEM;
1465         }
1466         return 0;
1467 }
1468
1469 void destroy_checkpoint_caches(void)
1470 {
1471         kmem_cache_destroy(ino_entry_slab);
1472         kmem_cache_destroy(inode_entry_slab);
1473 }