GNU Linux-libre 4.19.245-gnu1
[releases.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/swap.h>
18 #include <linux/timer.h>
19 #include <linux/freezer.h>
20 #include <linux/sched/signal.h>
21
22 #include "f2fs.h"
23 #include "segment.h"
24 #include "node.h"
25 #include "gc.h"
26 #include "trace.h"
27 #include <trace/events/f2fs.h>
28
29 #define __reverse_ffz(x) __reverse_ffs(~(x))
30
31 static struct kmem_cache *discard_entry_slab;
32 static struct kmem_cache *discard_cmd_slab;
33 static struct kmem_cache *sit_entry_set_slab;
34 static struct kmem_cache *inmem_entry_slab;
35
36 static unsigned long __reverse_ulong(unsigned char *str)
37 {
38         unsigned long tmp = 0;
39         int shift = 24, idx = 0;
40
41 #if BITS_PER_LONG == 64
42         shift = 56;
43 #endif
44         while (shift >= 0) {
45                 tmp |= (unsigned long)str[idx++] << shift;
46                 shift -= BITS_PER_BYTE;
47         }
48         return tmp;
49 }
50
51 /*
52  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
53  * MSB and LSB are reversed in a byte by f2fs_set_bit.
54  */
55 static inline unsigned long __reverse_ffs(unsigned long word)
56 {
57         int num = 0;
58
59 #if BITS_PER_LONG == 64
60         if ((word & 0xffffffff00000000UL) == 0)
61                 num += 32;
62         else
63                 word >>= 32;
64 #endif
65         if ((word & 0xffff0000) == 0)
66                 num += 16;
67         else
68                 word >>= 16;
69
70         if ((word & 0xff00) == 0)
71                 num += 8;
72         else
73                 word >>= 8;
74
75         if ((word & 0xf0) == 0)
76                 num += 4;
77         else
78                 word >>= 4;
79
80         if ((word & 0xc) == 0)
81                 num += 2;
82         else
83                 word >>= 2;
84
85         if ((word & 0x2) == 0)
86                 num += 1;
87         return num;
88 }
89
90 /*
91  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
92  * f2fs_set_bit makes MSB and LSB reversed in a byte.
93  * @size must be integral times of unsigned long.
94  * Example:
95  *                             MSB <--> LSB
96  *   f2fs_set_bit(0, bitmap) => 1000 0000
97  *   f2fs_set_bit(7, bitmap) => 0000 0001
98  */
99 static unsigned long __find_rev_next_bit(const unsigned long *addr,
100                         unsigned long size, unsigned long offset)
101 {
102         const unsigned long *p = addr + BIT_WORD(offset);
103         unsigned long result = size;
104         unsigned long tmp;
105
106         if (offset >= size)
107                 return size;
108
109         size -= (offset & ~(BITS_PER_LONG - 1));
110         offset %= BITS_PER_LONG;
111
112         while (1) {
113                 if (*p == 0)
114                         goto pass;
115
116                 tmp = __reverse_ulong((unsigned char *)p);
117
118                 tmp &= ~0UL >> offset;
119                 if (size < BITS_PER_LONG)
120                         tmp &= (~0UL << (BITS_PER_LONG - size));
121                 if (tmp)
122                         goto found;
123 pass:
124                 if (size <= BITS_PER_LONG)
125                         break;
126                 size -= BITS_PER_LONG;
127                 offset = 0;
128                 p++;
129         }
130         return result;
131 found:
132         return result - size + __reverse_ffs(tmp);
133 }
134
135 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
136                         unsigned long size, unsigned long offset)
137 {
138         const unsigned long *p = addr + BIT_WORD(offset);
139         unsigned long result = size;
140         unsigned long tmp;
141
142         if (offset >= size)
143                 return size;
144
145         size -= (offset & ~(BITS_PER_LONG - 1));
146         offset %= BITS_PER_LONG;
147
148         while (1) {
149                 if (*p == ~0UL)
150                         goto pass;
151
152                 tmp = __reverse_ulong((unsigned char *)p);
153
154                 if (offset)
155                         tmp |= ~0UL << (BITS_PER_LONG - offset);
156                 if (size < BITS_PER_LONG)
157                         tmp |= ~0UL >> size;
158                 if (tmp != ~0UL)
159                         goto found;
160 pass:
161                 if (size <= BITS_PER_LONG)
162                         break;
163                 size -= BITS_PER_LONG;
164                 offset = 0;
165                 p++;
166         }
167         return result;
168 found:
169         return result - size + __reverse_ffz(tmp);
170 }
171
172 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
173 {
174         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
175         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
176         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
177
178         if (test_opt(sbi, LFS))
179                 return false;
180         if (sbi->gc_mode == GC_URGENT)
181                 return true;
182
183         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
184                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
185 }
186
187 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
188 {
189         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191         struct inmem_pages *new;
192
193         f2fs_trace_pid(page);
194
195         set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
196         SetPagePrivate(page);
197
198         new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
199
200         /* add atomic page indices to the list */
201         new->page = page;
202         INIT_LIST_HEAD(&new->list);
203
204         /* increase reference count with clean state */
205         mutex_lock(&fi->inmem_lock);
206         get_page(page);
207         list_add_tail(&new->list, &fi->inmem_pages);
208         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
209         if (list_empty(&fi->inmem_ilist))
210                 list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
211         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
212         inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
213         mutex_unlock(&fi->inmem_lock);
214
215         trace_f2fs_register_inmem_page(page, INMEM);
216 }
217
218 static int __revoke_inmem_pages(struct inode *inode,
219                                 struct list_head *head, bool drop, bool recover,
220                                 bool trylock)
221 {
222         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
223         struct inmem_pages *cur, *tmp;
224         int err = 0;
225
226         list_for_each_entry_safe(cur, tmp, head, list) {
227                 struct page *page = cur->page;
228
229                 if (drop)
230                         trace_f2fs_commit_inmem_page(page, INMEM_DROP);
231
232                 if (trylock) {
233                         /*
234                          * to avoid deadlock in between page lock and
235                          * inmem_lock.
236                          */
237                         if (!trylock_page(page))
238                                 continue;
239                 } else {
240                         lock_page(page);
241                 }
242
243                 f2fs_wait_on_page_writeback(page, DATA, true);
244
245                 if (recover) {
246                         struct dnode_of_data dn;
247                         struct node_info ni;
248
249                         trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
250 retry:
251                         set_new_dnode(&dn, inode, NULL, NULL, 0);
252                         err = f2fs_get_dnode_of_data(&dn, page->index,
253                                                                 LOOKUP_NODE);
254                         if (err) {
255                                 if (err == -ENOMEM) {
256                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
257                                         cond_resched();
258                                         goto retry;
259                                 }
260                                 err = -EAGAIN;
261                                 goto next;
262                         }
263
264                         err = f2fs_get_node_info(sbi, dn.nid, &ni);
265                         if (err) {
266                                 f2fs_put_dnode(&dn);
267                                 return err;
268                         }
269
270                         if (cur->old_addr == NEW_ADDR) {
271                                 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
272                                 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
273                         } else
274                                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
275                                         cur->old_addr, ni.version, true, true);
276                         f2fs_put_dnode(&dn);
277                 }
278 next:
279                 /* we don't need to invalidate this in the sccessful status */
280                 if (drop || recover) {
281                         ClearPageUptodate(page);
282                         clear_cold_data(page);
283                 }
284                 set_page_private(page, 0);
285                 ClearPagePrivate(page);
286                 f2fs_put_page(page, 1);
287
288                 list_del(&cur->list);
289                 kmem_cache_free(inmem_entry_slab, cur);
290                 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
291         }
292         return err;
293 }
294
295 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
296 {
297         struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
298         struct inode *inode;
299         struct f2fs_inode_info *fi;
300 next:
301         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
302         if (list_empty(head)) {
303                 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
304                 return;
305         }
306         fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
307         inode = igrab(&fi->vfs_inode);
308         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
309
310         if (inode) {
311                 if (gc_failure) {
312                         if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
313                                 goto drop;
314                         goto skip;
315                 }
316 drop:
317                 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
318                 f2fs_drop_inmem_pages(inode);
319                 iput(inode);
320         }
321 skip:
322         congestion_wait(BLK_RW_ASYNC, HZ/50);
323         cond_resched();
324         goto next;
325 }
326
327 void f2fs_drop_inmem_pages(struct inode *inode)
328 {
329         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
330         struct f2fs_inode_info *fi = F2FS_I(inode);
331
332         while (!list_empty(&fi->inmem_pages)) {
333                 mutex_lock(&fi->inmem_lock);
334                 __revoke_inmem_pages(inode, &fi->inmem_pages,
335                                                 true, false, true);
336
337                 if (list_empty(&fi->inmem_pages)) {
338                         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
339                         if (!list_empty(&fi->inmem_ilist))
340                                 list_del_init(&fi->inmem_ilist);
341                         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
342                 }
343                 mutex_unlock(&fi->inmem_lock);
344         }
345
346         clear_inode_flag(inode, FI_ATOMIC_FILE);
347         fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
348         stat_dec_atomic_write(inode);
349 }
350
351 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
352 {
353         struct f2fs_inode_info *fi = F2FS_I(inode);
354         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
355         struct list_head *head = &fi->inmem_pages;
356         struct inmem_pages *cur = NULL;
357
358         f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
359
360         mutex_lock(&fi->inmem_lock);
361         list_for_each_entry(cur, head, list) {
362                 if (cur->page == page)
363                         break;
364         }
365
366         f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
367         list_del(&cur->list);
368         mutex_unlock(&fi->inmem_lock);
369
370         dec_page_count(sbi, F2FS_INMEM_PAGES);
371         kmem_cache_free(inmem_entry_slab, cur);
372
373         ClearPageUptodate(page);
374         set_page_private(page, 0);
375         ClearPagePrivate(page);
376         f2fs_put_page(page, 0);
377
378         trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
379 }
380
381 static int __f2fs_commit_inmem_pages(struct inode *inode)
382 {
383         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
384         struct f2fs_inode_info *fi = F2FS_I(inode);
385         struct inmem_pages *cur, *tmp;
386         struct f2fs_io_info fio = {
387                 .sbi = sbi,
388                 .ino = inode->i_ino,
389                 .type = DATA,
390                 .op = REQ_OP_WRITE,
391                 .op_flags = REQ_SYNC | REQ_PRIO,
392                 .io_type = FS_DATA_IO,
393         };
394         struct list_head revoke_list;
395         pgoff_t last_idx = ULONG_MAX;
396         int err = 0;
397
398         INIT_LIST_HEAD(&revoke_list);
399
400         list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
401                 struct page *page = cur->page;
402
403                 lock_page(page);
404                 if (page->mapping == inode->i_mapping) {
405                         trace_f2fs_commit_inmem_page(page, INMEM);
406
407                         set_page_dirty(page);
408                         f2fs_wait_on_page_writeback(page, DATA, true);
409                         if (clear_page_dirty_for_io(page)) {
410                                 inode_dec_dirty_pages(inode);
411                                 f2fs_remove_dirty_inode(inode);
412                         }
413 retry:
414                         fio.page = page;
415                         fio.old_blkaddr = NULL_ADDR;
416                         fio.encrypted_page = NULL;
417                         fio.need_lock = LOCK_DONE;
418                         err = f2fs_do_write_data_page(&fio);
419                         if (err) {
420                                 if (err == -ENOMEM) {
421                                         congestion_wait(BLK_RW_ASYNC, HZ/50);
422                                         cond_resched();
423                                         goto retry;
424                                 }
425                                 unlock_page(page);
426                                 break;
427                         }
428                         /* record old blkaddr for revoking */
429                         cur->old_addr = fio.old_blkaddr;
430                         last_idx = page->index;
431                 }
432                 unlock_page(page);
433                 list_move_tail(&cur->list, &revoke_list);
434         }
435
436         if (last_idx != ULONG_MAX)
437                 f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
438
439         if (err) {
440                 /*
441                  * try to revoke all committed pages, but still we could fail
442                  * due to no memory or other reason, if that happened, EAGAIN
443                  * will be returned, which means in such case, transaction is
444                  * already not integrity, caller should use journal to do the
445                  * recovery or rewrite & commit last transaction. For other
446                  * error number, revoking was done by filesystem itself.
447                  */
448                 err = __revoke_inmem_pages(inode, &revoke_list,
449                                                 false, true, false);
450
451                 /* drop all uncommitted pages */
452                 __revoke_inmem_pages(inode, &fi->inmem_pages,
453                                                 true, false, false);
454         } else {
455                 __revoke_inmem_pages(inode, &revoke_list,
456                                                 false, false, false);
457         }
458
459         return err;
460 }
461
462 int f2fs_commit_inmem_pages(struct inode *inode)
463 {
464         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
465         struct f2fs_inode_info *fi = F2FS_I(inode);
466         int err;
467
468         f2fs_balance_fs(sbi, true);
469
470         down_write(&fi->i_gc_rwsem[WRITE]);
471
472         f2fs_lock_op(sbi);
473         set_inode_flag(inode, FI_ATOMIC_COMMIT);
474
475         mutex_lock(&fi->inmem_lock);
476         err = __f2fs_commit_inmem_pages(inode);
477
478         spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
479         if (!list_empty(&fi->inmem_ilist))
480                 list_del_init(&fi->inmem_ilist);
481         spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
482         mutex_unlock(&fi->inmem_lock);
483
484         clear_inode_flag(inode, FI_ATOMIC_COMMIT);
485
486         f2fs_unlock_op(sbi);
487         up_write(&fi->i_gc_rwsem[WRITE]);
488
489         return err;
490 }
491
492 /*
493  * This function balances dirty node and dentry pages.
494  * In addition, it controls garbage collection.
495  */
496 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
497 {
498         if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
499                 f2fs_show_injection_info(FAULT_CHECKPOINT);
500                 f2fs_stop_checkpoint(sbi, false);
501         }
502
503         /* balance_fs_bg is able to be pending */
504         if (need && excess_cached_nats(sbi))
505                 f2fs_balance_fs_bg(sbi);
506
507         /*
508          * We should do GC or end up with checkpoint, if there are so many dirty
509          * dir/node pages without enough free segments.
510          */
511         if (has_not_enough_free_secs(sbi, 0, 0)) {
512                 mutex_lock(&sbi->gc_mutex);
513                 f2fs_gc(sbi, false, false, NULL_SEGNO);
514         }
515 }
516
517 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
518 {
519         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
520                 return;
521
522         /* try to shrink extent cache when there is no enough memory */
523         if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
524                 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
525
526         /* check the # of cached NAT entries */
527         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
528                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
529
530         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
531                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
532         else
533                 f2fs_build_free_nids(sbi, false, false);
534
535         if (!is_idle(sbi) &&
536                 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
537                 return;
538
539         /* checkpoint is the only way to shrink partial cached entries */
540         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
541                         !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
542                         excess_prefree_segs(sbi) ||
543                         excess_dirty_nats(sbi) ||
544                         excess_dirty_nodes(sbi) ||
545                         f2fs_time_over(sbi, CP_TIME)) {
546                 if (test_opt(sbi, DATA_FLUSH)) {
547                         struct blk_plug plug;
548
549                         blk_start_plug(&plug);
550                         f2fs_sync_dirty_inodes(sbi, FILE_INODE);
551                         blk_finish_plug(&plug);
552                 }
553                 f2fs_sync_fs(sbi->sb, true);
554                 stat_inc_bg_cp_count(sbi->stat_info);
555         }
556 }
557
558 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
559                                 struct block_device *bdev)
560 {
561         struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
562         int ret;
563
564         bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
565         bio_set_dev(bio, bdev);
566         ret = submit_bio_wait(bio);
567         bio_put(bio);
568
569         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
570                                 test_opt(sbi, FLUSH_MERGE), ret);
571         return ret;
572 }
573
574 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
575 {
576         int ret = 0;
577         int i;
578
579         if (!f2fs_is_multi_device(sbi))
580                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
581
582         for (i = 0; i < sbi->s_ndevs; i++) {
583                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
584                         continue;
585                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
586                 if (ret)
587                         break;
588         }
589         return ret;
590 }
591
592 static int issue_flush_thread(void *data)
593 {
594         struct f2fs_sb_info *sbi = data;
595         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
596         wait_queue_head_t *q = &fcc->flush_wait_queue;
597 repeat:
598         if (kthread_should_stop())
599                 return 0;
600
601         sb_start_intwrite(sbi->sb);
602
603         if (!llist_empty(&fcc->issue_list)) {
604                 struct flush_cmd *cmd, *next;
605                 int ret;
606
607                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
608                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
609
610                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
611
612                 ret = submit_flush_wait(sbi, cmd->ino);
613                 atomic_inc(&fcc->issued_flush);
614
615                 llist_for_each_entry_safe(cmd, next,
616                                           fcc->dispatch_list, llnode) {
617                         cmd->ret = ret;
618                         complete(&cmd->wait);
619                 }
620                 fcc->dispatch_list = NULL;
621         }
622
623         sb_end_intwrite(sbi->sb);
624
625         wait_event_interruptible(*q,
626                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
627         goto repeat;
628 }
629
630 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
631 {
632         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
633         struct flush_cmd cmd;
634         int ret;
635
636         if (test_opt(sbi, NOBARRIER))
637                 return 0;
638
639         if (!test_opt(sbi, FLUSH_MERGE)) {
640                 atomic_inc(&fcc->issing_flush);
641                 ret = submit_flush_wait(sbi, ino);
642                 atomic_dec(&fcc->issing_flush);
643                 atomic_inc(&fcc->issued_flush);
644                 return ret;
645         }
646
647         if (atomic_inc_return(&fcc->issing_flush) == 1 ||
648             f2fs_is_multi_device(sbi)) {
649                 ret = submit_flush_wait(sbi, ino);
650                 atomic_dec(&fcc->issing_flush);
651
652                 atomic_inc(&fcc->issued_flush);
653                 return ret;
654         }
655
656         cmd.ino = ino;
657         init_completion(&cmd.wait);
658
659         llist_add(&cmd.llnode, &fcc->issue_list);
660
661         /* update issue_list before we wake up issue_flush thread */
662         smp_mb();
663
664         if (waitqueue_active(&fcc->flush_wait_queue))
665                 wake_up(&fcc->flush_wait_queue);
666
667         if (fcc->f2fs_issue_flush) {
668                 wait_for_completion(&cmd.wait);
669                 atomic_dec(&fcc->issing_flush);
670         } else {
671                 struct llist_node *list;
672
673                 list = llist_del_all(&fcc->issue_list);
674                 if (!list) {
675                         wait_for_completion(&cmd.wait);
676                         atomic_dec(&fcc->issing_flush);
677                 } else {
678                         struct flush_cmd *tmp, *next;
679
680                         ret = submit_flush_wait(sbi, ino);
681
682                         llist_for_each_entry_safe(tmp, next, list, llnode) {
683                                 if (tmp == &cmd) {
684                                         cmd.ret = ret;
685                                         atomic_dec(&fcc->issing_flush);
686                                         continue;
687                                 }
688                                 tmp->ret = ret;
689                                 complete(&tmp->wait);
690                         }
691                 }
692         }
693
694         return cmd.ret;
695 }
696
697 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
698 {
699         dev_t dev = sbi->sb->s_bdev->bd_dev;
700         struct flush_cmd_control *fcc;
701         int err = 0;
702
703         if (SM_I(sbi)->fcc_info) {
704                 fcc = SM_I(sbi)->fcc_info;
705                 if (fcc->f2fs_issue_flush)
706                         return err;
707                 goto init_thread;
708         }
709
710         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
711         if (!fcc)
712                 return -ENOMEM;
713         atomic_set(&fcc->issued_flush, 0);
714         atomic_set(&fcc->issing_flush, 0);
715         init_waitqueue_head(&fcc->flush_wait_queue);
716         init_llist_head(&fcc->issue_list);
717         SM_I(sbi)->fcc_info = fcc;
718         if (!test_opt(sbi, FLUSH_MERGE))
719                 return err;
720
721 init_thread:
722         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
723                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
724         if (IS_ERR(fcc->f2fs_issue_flush)) {
725                 err = PTR_ERR(fcc->f2fs_issue_flush);
726                 kfree(fcc);
727                 SM_I(sbi)->fcc_info = NULL;
728                 return err;
729         }
730
731         return err;
732 }
733
734 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
735 {
736         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
737
738         if (fcc && fcc->f2fs_issue_flush) {
739                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
740
741                 fcc->f2fs_issue_flush = NULL;
742                 kthread_stop(flush_thread);
743         }
744         if (free) {
745                 kfree(fcc);
746                 SM_I(sbi)->fcc_info = NULL;
747         }
748 }
749
750 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
751 {
752         int ret = 0, i;
753
754         if (!f2fs_is_multi_device(sbi))
755                 return 0;
756
757         for (i = 1; i < sbi->s_ndevs; i++) {
758                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
759                         continue;
760                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
761                 if (ret)
762                         break;
763
764                 spin_lock(&sbi->dev_lock);
765                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
766                 spin_unlock(&sbi->dev_lock);
767         }
768
769         return ret;
770 }
771
772 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
773                 enum dirty_type dirty_type)
774 {
775         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
776
777         /* need not be added */
778         if (IS_CURSEG(sbi, segno))
779                 return;
780
781         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
782                 dirty_i->nr_dirty[dirty_type]++;
783
784         if (dirty_type == DIRTY) {
785                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
786                 enum dirty_type t = sentry->type;
787
788                 if (unlikely(t >= DIRTY)) {
789                         f2fs_bug_on(sbi, 1);
790                         return;
791                 }
792                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
793                         dirty_i->nr_dirty[t]++;
794         }
795 }
796
797 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
798                 enum dirty_type dirty_type)
799 {
800         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
801
802         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
803                 dirty_i->nr_dirty[dirty_type]--;
804
805         if (dirty_type == DIRTY) {
806                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
807                 enum dirty_type t = sentry->type;
808
809                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
810                         dirty_i->nr_dirty[t]--;
811
812                 if (get_valid_blocks(sbi, segno, true) == 0)
813                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
814                                                 dirty_i->victim_secmap);
815         }
816 }
817
818 /*
819  * Should not occur error such as -ENOMEM.
820  * Adding dirty entry into seglist is not critical operation.
821  * If a given segment is one of current working segments, it won't be added.
822  */
823 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
824 {
825         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
826         unsigned short valid_blocks;
827
828         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
829                 return;
830
831         mutex_lock(&dirty_i->seglist_lock);
832
833         valid_blocks = get_valid_blocks(sbi, segno, false);
834
835         if (valid_blocks == 0) {
836                 __locate_dirty_segment(sbi, segno, PRE);
837                 __remove_dirty_segment(sbi, segno, DIRTY);
838         } else if (valid_blocks < sbi->blocks_per_seg) {
839                 __locate_dirty_segment(sbi, segno, DIRTY);
840         } else {
841                 /* Recovery routine with SSR needs this */
842                 __remove_dirty_segment(sbi, segno, DIRTY);
843         }
844
845         mutex_unlock(&dirty_i->seglist_lock);
846 }
847
848 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
849                 struct block_device *bdev, block_t lstart,
850                 block_t start, block_t len)
851 {
852         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
853         struct list_head *pend_list;
854         struct discard_cmd *dc;
855
856         f2fs_bug_on(sbi, !len);
857
858         pend_list = &dcc->pend_list[plist_idx(len)];
859
860         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
861         INIT_LIST_HEAD(&dc->list);
862         dc->bdev = bdev;
863         dc->lstart = lstart;
864         dc->start = start;
865         dc->len = len;
866         dc->ref = 0;
867         dc->state = D_PREP;
868         dc->issuing = 0;
869         dc->error = 0;
870         init_completion(&dc->wait);
871         list_add_tail(&dc->list, pend_list);
872         spin_lock_init(&dc->lock);
873         dc->bio_ref = 0;
874         atomic_inc(&dcc->discard_cmd_cnt);
875         dcc->undiscard_blks += len;
876
877         return dc;
878 }
879
880 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
881                                 struct block_device *bdev, block_t lstart,
882                                 block_t start, block_t len,
883                                 struct rb_node *parent, struct rb_node **p)
884 {
885         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
886         struct discard_cmd *dc;
887
888         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
889
890         rb_link_node(&dc->rb_node, parent, p);
891         rb_insert_color(&dc->rb_node, &dcc->root);
892
893         return dc;
894 }
895
896 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
897                                                         struct discard_cmd *dc)
898 {
899         if (dc->state == D_DONE)
900                 atomic_sub(dc->issuing, &dcc->issing_discard);
901
902         list_del(&dc->list);
903         rb_erase(&dc->rb_node, &dcc->root);
904         dcc->undiscard_blks -= dc->len;
905
906         kmem_cache_free(discard_cmd_slab, dc);
907
908         atomic_dec(&dcc->discard_cmd_cnt);
909 }
910
911 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
912                                                         struct discard_cmd *dc)
913 {
914         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
915         unsigned long flags;
916
917         trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
918
919         spin_lock_irqsave(&dc->lock, flags);
920         if (dc->bio_ref) {
921                 spin_unlock_irqrestore(&dc->lock, flags);
922                 return;
923         }
924         spin_unlock_irqrestore(&dc->lock, flags);
925
926         f2fs_bug_on(sbi, dc->ref);
927
928         if (dc->error == -EOPNOTSUPP)
929                 dc->error = 0;
930
931         if (dc->error)
932                 f2fs_msg(sbi->sb, KERN_INFO,
933                         "Issue discard(%u, %u, %u) failed, ret: %d",
934                         dc->lstart, dc->start, dc->len, dc->error);
935         __detach_discard_cmd(dcc, dc);
936 }
937
938 static void f2fs_submit_discard_endio(struct bio *bio)
939 {
940         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
941         unsigned long flags;
942
943         dc->error = blk_status_to_errno(bio->bi_status);
944
945         spin_lock_irqsave(&dc->lock, flags);
946         dc->bio_ref--;
947         if (!dc->bio_ref && dc->state == D_SUBMIT) {
948                 dc->state = D_DONE;
949                 complete_all(&dc->wait);
950         }
951         spin_unlock_irqrestore(&dc->lock, flags);
952         bio_put(bio);
953 }
954
955 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
956                                 block_t start, block_t end)
957 {
958 #ifdef CONFIG_F2FS_CHECK_FS
959         struct seg_entry *sentry;
960         unsigned int segno;
961         block_t blk = start;
962         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
963         unsigned long *map;
964
965         while (blk < end) {
966                 segno = GET_SEGNO(sbi, blk);
967                 sentry = get_seg_entry(sbi, segno);
968                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
969
970                 if (end < START_BLOCK(sbi, segno + 1))
971                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
972                 else
973                         size = max_blocks;
974                 map = (unsigned long *)(sentry->cur_valid_map);
975                 offset = __find_rev_next_bit(map, size, offset);
976                 f2fs_bug_on(sbi, offset != size);
977                 blk = START_BLOCK(sbi, segno + 1);
978         }
979 #endif
980 }
981
982 static void __init_discard_policy(struct f2fs_sb_info *sbi,
983                                 struct discard_policy *dpolicy,
984                                 int discard_type, unsigned int granularity)
985 {
986         /* common policy */
987         dpolicy->type = discard_type;
988         dpolicy->sync = true;
989         dpolicy->ordered = false;
990         dpolicy->granularity = granularity;
991
992         dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
993         dpolicy->io_aware_gran = MAX_PLIST_NUM;
994
995         if (discard_type == DPOLICY_BG) {
996                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
997                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
998                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
999                 dpolicy->io_aware = true;
1000                 dpolicy->sync = false;
1001                 dpolicy->ordered = true;
1002                 if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1003                         dpolicy->granularity = 1;
1004                         dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1005                 }
1006         } else if (discard_type == DPOLICY_FORCE) {
1007                 dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1008                 dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1009                 dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1010                 dpolicy->io_aware = false;
1011         } else if (discard_type == DPOLICY_FSTRIM) {
1012                 dpolicy->io_aware = false;
1013         } else if (discard_type == DPOLICY_UMOUNT) {
1014                 dpolicy->max_requests = UINT_MAX;
1015                 dpolicy->io_aware = false;
1016         }
1017 }
1018
1019 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1020                                 struct block_device *bdev, block_t lstart,
1021                                 block_t start, block_t len);
1022 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1023 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1024                                                 struct discard_policy *dpolicy,
1025                                                 struct discard_cmd *dc,
1026                                                 unsigned int *issued)
1027 {
1028         struct block_device *bdev = dc->bdev;
1029         struct request_queue *q = bdev_get_queue(bdev);
1030         unsigned int max_discard_blocks =
1031                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1032         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1033         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1034                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1035         int flag = dpolicy->sync ? REQ_SYNC : 0;
1036         block_t lstart, start, len, total_len;
1037         int err = 0;
1038
1039         if (dc->state != D_PREP)
1040                 return 0;
1041
1042         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1043                 return 0;
1044
1045         trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1046
1047         lstart = dc->lstart;
1048         start = dc->start;
1049         len = dc->len;
1050         total_len = len;
1051
1052         dc->len = 0;
1053
1054         while (total_len && *issued < dpolicy->max_requests && !err) {
1055                 struct bio *bio = NULL;
1056                 unsigned long flags;
1057                 bool last = true;
1058
1059                 if (len > max_discard_blocks) {
1060                         len = max_discard_blocks;
1061                         last = false;
1062                 }
1063
1064                 (*issued)++;
1065                 if (*issued == dpolicy->max_requests)
1066                         last = true;
1067
1068                 dc->len += len;
1069
1070                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1071                         f2fs_show_injection_info(FAULT_DISCARD);
1072                         err = -EIO;
1073                         goto submit;
1074                 }
1075                 err = __blkdev_issue_discard(bdev,
1076                                         SECTOR_FROM_BLOCK(start),
1077                                         SECTOR_FROM_BLOCK(len),
1078                                         GFP_NOFS, 0, &bio);
1079 submit:
1080                 if (err) {
1081                         spin_lock_irqsave(&dc->lock, flags);
1082                         if (dc->state == D_PARTIAL)
1083                                 dc->state = D_SUBMIT;
1084                         spin_unlock_irqrestore(&dc->lock, flags);
1085
1086                         break;
1087                 }
1088
1089                 f2fs_bug_on(sbi, !bio);
1090
1091                 /*
1092                  * should keep before submission to avoid D_DONE
1093                  * right away
1094                  */
1095                 spin_lock_irqsave(&dc->lock, flags);
1096                 if (last)
1097                         dc->state = D_SUBMIT;
1098                 else
1099                         dc->state = D_PARTIAL;
1100                 dc->bio_ref++;
1101                 spin_unlock_irqrestore(&dc->lock, flags);
1102
1103                 atomic_inc(&dcc->issing_discard);
1104                 dc->issuing++;
1105                 list_move_tail(&dc->list, wait_list);
1106
1107                 /* sanity check on discard range */
1108                 __check_sit_bitmap(sbi, lstart, lstart + len);
1109
1110                 bio->bi_private = dc;
1111                 bio->bi_end_io = f2fs_submit_discard_endio;
1112                 bio->bi_opf |= flag;
1113                 submit_bio(bio);
1114
1115                 atomic_inc(&dcc->issued_discard);
1116
1117                 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1118
1119                 lstart += len;
1120                 start += len;
1121                 total_len -= len;
1122                 len = total_len;
1123         }
1124
1125         if (!err && len)
1126                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1127         return err;
1128 }
1129
1130 static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
1131                                 struct block_device *bdev, block_t lstart,
1132                                 block_t start, block_t len,
1133                                 struct rb_node **insert_p,
1134                                 struct rb_node *insert_parent)
1135 {
1136         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1137         struct rb_node **p;
1138         struct rb_node *parent = NULL;
1139         struct discard_cmd *dc = NULL;
1140
1141         if (insert_p && insert_parent) {
1142                 parent = insert_parent;
1143                 p = insert_p;
1144                 goto do_insert;
1145         }
1146
1147         p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
1148 do_insert:
1149         dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
1150         if (!dc)
1151                 return NULL;
1152
1153         return dc;
1154 }
1155
1156 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1157                                                 struct discard_cmd *dc)
1158 {
1159         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1160 }
1161
1162 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1163                                 struct discard_cmd *dc, block_t blkaddr)
1164 {
1165         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1166         struct discard_info di = dc->di;
1167         bool modified = false;
1168
1169         if (dc->state == D_DONE || dc->len == 1) {
1170                 __remove_discard_cmd(sbi, dc);
1171                 return;
1172         }
1173
1174         dcc->undiscard_blks -= di.len;
1175
1176         if (blkaddr > di.lstart) {
1177                 dc->len = blkaddr - dc->lstart;
1178                 dcc->undiscard_blks += dc->len;
1179                 __relocate_discard_cmd(dcc, dc);
1180                 modified = true;
1181         }
1182
1183         if (blkaddr < di.lstart + di.len - 1) {
1184                 if (modified) {
1185                         __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1186                                         di.start + blkaddr + 1 - di.lstart,
1187                                         di.lstart + di.len - 1 - blkaddr,
1188                                         NULL, NULL);
1189                 } else {
1190                         dc->lstart++;
1191                         dc->len--;
1192                         dc->start++;
1193                         dcc->undiscard_blks += dc->len;
1194                         __relocate_discard_cmd(dcc, dc);
1195                 }
1196         }
1197 }
1198
1199 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1200                                 struct block_device *bdev, block_t lstart,
1201                                 block_t start, block_t len)
1202 {
1203         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1204         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1205         struct discard_cmd *dc;
1206         struct discard_info di = {0};
1207         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1208         struct request_queue *q = bdev_get_queue(bdev);
1209         unsigned int max_discard_blocks =
1210                         SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1211         block_t end = lstart + len;
1212
1213         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1214                                         NULL, lstart,
1215                                         (struct rb_entry **)&prev_dc,
1216                                         (struct rb_entry **)&next_dc,
1217                                         &insert_p, &insert_parent, true);
1218         if (dc)
1219                 prev_dc = dc;
1220
1221         if (!prev_dc) {
1222                 di.lstart = lstart;
1223                 di.len = next_dc ? next_dc->lstart - lstart : len;
1224                 di.len = min(di.len, len);
1225                 di.start = start;
1226         }
1227
1228         while (1) {
1229                 struct rb_node *node;
1230                 bool merged = false;
1231                 struct discard_cmd *tdc = NULL;
1232
1233                 if (prev_dc) {
1234                         di.lstart = prev_dc->lstart + prev_dc->len;
1235                         if (di.lstart < lstart)
1236                                 di.lstart = lstart;
1237                         if (di.lstart >= end)
1238                                 break;
1239
1240                         if (!next_dc || next_dc->lstart > end)
1241                                 di.len = end - di.lstart;
1242                         else
1243                                 di.len = next_dc->lstart - di.lstart;
1244                         di.start = start + di.lstart - lstart;
1245                 }
1246
1247                 if (!di.len)
1248                         goto next;
1249
1250                 if (prev_dc && prev_dc->state == D_PREP &&
1251                         prev_dc->bdev == bdev &&
1252                         __is_discard_back_mergeable(&di, &prev_dc->di,
1253                                                         max_discard_blocks)) {
1254                         prev_dc->di.len += di.len;
1255                         dcc->undiscard_blks += di.len;
1256                         __relocate_discard_cmd(dcc, prev_dc);
1257                         di = prev_dc->di;
1258                         tdc = prev_dc;
1259                         merged = true;
1260                 }
1261
1262                 if (next_dc && next_dc->state == D_PREP &&
1263                         next_dc->bdev == bdev &&
1264                         __is_discard_front_mergeable(&di, &next_dc->di,
1265                                                         max_discard_blocks)) {
1266                         next_dc->di.lstart = di.lstart;
1267                         next_dc->di.len += di.len;
1268                         next_dc->di.start = di.start;
1269                         dcc->undiscard_blks += di.len;
1270                         __relocate_discard_cmd(dcc, next_dc);
1271                         if (tdc)
1272                                 __remove_discard_cmd(sbi, tdc);
1273                         merged = true;
1274                 }
1275
1276                 if (!merged) {
1277                         __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1278                                                         di.len, NULL, NULL);
1279                 }
1280  next:
1281                 prev_dc = next_dc;
1282                 if (!prev_dc)
1283                         break;
1284
1285                 node = rb_next(&prev_dc->rb_node);
1286                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1287         }
1288 }
1289
1290 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1291                 struct block_device *bdev, block_t blkstart, block_t blklen)
1292 {
1293         block_t lblkstart = blkstart;
1294
1295         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1296
1297         if (f2fs_is_multi_device(sbi)) {
1298                 int devi = f2fs_target_device_index(sbi, blkstart);
1299
1300                 blkstart -= FDEV(devi).start_blk;
1301         }
1302         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1303         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1304         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1305         return 0;
1306 }
1307
1308 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1309                                         struct discard_policy *dpolicy)
1310 {
1311         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1312         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1313         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1314         struct discard_cmd *dc;
1315         struct blk_plug plug;
1316         unsigned int pos = dcc->next_pos;
1317         unsigned int issued = 0;
1318         bool io_interrupted = false;
1319
1320         mutex_lock(&dcc->cmd_lock);
1321         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1322                                         NULL, pos,
1323                                         (struct rb_entry **)&prev_dc,
1324                                         (struct rb_entry **)&next_dc,
1325                                         &insert_p, &insert_parent, true);
1326         if (!dc)
1327                 dc = next_dc;
1328
1329         blk_start_plug(&plug);
1330
1331         while (dc) {
1332                 struct rb_node *node;
1333                 int err = 0;
1334
1335                 if (dc->state != D_PREP)
1336                         goto next;
1337
1338                 if (dpolicy->io_aware && !is_idle(sbi)) {
1339                         io_interrupted = true;
1340                         break;
1341                 }
1342
1343                 dcc->next_pos = dc->lstart + dc->len;
1344                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1345
1346                 if (issued >= dpolicy->max_requests)
1347                         break;
1348 next:
1349                 node = rb_next(&dc->rb_node);
1350                 if (err)
1351                         __remove_discard_cmd(sbi, dc);
1352                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1353         }
1354
1355         blk_finish_plug(&plug);
1356
1357         if (!dc)
1358                 dcc->next_pos = 0;
1359
1360         mutex_unlock(&dcc->cmd_lock);
1361
1362         if (!issued && io_interrupted)
1363                 issued = -1;
1364
1365         return issued;
1366 }
1367
1368 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1369                                         struct discard_policy *dpolicy)
1370 {
1371         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1372         struct list_head *pend_list;
1373         struct discard_cmd *dc, *tmp;
1374         struct blk_plug plug;
1375         int i, issued = 0;
1376         bool io_interrupted = false;
1377
1378         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1379                 if (i + 1 < dpolicy->granularity)
1380                         break;
1381
1382                 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1383                         return __issue_discard_cmd_orderly(sbi, dpolicy);
1384
1385                 pend_list = &dcc->pend_list[i];
1386
1387                 mutex_lock(&dcc->cmd_lock);
1388                 if (list_empty(pend_list))
1389                         goto next;
1390                 if (unlikely(dcc->rbtree_check))
1391                         f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1392                                                                 &dcc->root));
1393                 blk_start_plug(&plug);
1394                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1395                         f2fs_bug_on(sbi, dc->state != D_PREP);
1396
1397                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1398                                                                 !is_idle(sbi)) {
1399                                 io_interrupted = true;
1400                                 break;
1401                         }
1402
1403                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1404
1405                         if (issued >= dpolicy->max_requests)
1406                                 break;
1407                 }
1408                 blk_finish_plug(&plug);
1409 next:
1410                 mutex_unlock(&dcc->cmd_lock);
1411
1412                 if (issued >= dpolicy->max_requests || io_interrupted)
1413                         break;
1414         }
1415
1416         if (!issued && io_interrupted)
1417                 issued = -1;
1418
1419         return issued;
1420 }
1421
1422 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1423 {
1424         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1425         struct list_head *pend_list;
1426         struct discard_cmd *dc, *tmp;
1427         int i;
1428         bool dropped = false;
1429
1430         mutex_lock(&dcc->cmd_lock);
1431         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1432                 pend_list = &dcc->pend_list[i];
1433                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1434                         f2fs_bug_on(sbi, dc->state != D_PREP);
1435                         __remove_discard_cmd(sbi, dc);
1436                         dropped = true;
1437                 }
1438         }
1439         mutex_unlock(&dcc->cmd_lock);
1440
1441         return dropped;
1442 }
1443
1444 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1445 {
1446         __drop_discard_cmd(sbi);
1447 }
1448
1449 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1450                                                         struct discard_cmd *dc)
1451 {
1452         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1453         unsigned int len = 0;
1454
1455         wait_for_completion_io(&dc->wait);
1456         mutex_lock(&dcc->cmd_lock);
1457         f2fs_bug_on(sbi, dc->state != D_DONE);
1458         dc->ref--;
1459         if (!dc->ref) {
1460                 if (!dc->error)
1461                         len = dc->len;
1462                 __remove_discard_cmd(sbi, dc);
1463         }
1464         mutex_unlock(&dcc->cmd_lock);
1465
1466         return len;
1467 }
1468
1469 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1470                                                 struct discard_policy *dpolicy,
1471                                                 block_t start, block_t end)
1472 {
1473         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1474         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1475                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1476         struct discard_cmd *dc, *tmp;
1477         bool need_wait;
1478         unsigned int trimmed = 0;
1479
1480 next:
1481         need_wait = false;
1482
1483         mutex_lock(&dcc->cmd_lock);
1484         list_for_each_entry_safe(dc, tmp, wait_list, list) {
1485                 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1486                         continue;
1487                 if (dc->len < dpolicy->granularity)
1488                         continue;
1489                 if (dc->state == D_DONE && !dc->ref) {
1490                         wait_for_completion_io(&dc->wait);
1491                         if (!dc->error)
1492                                 trimmed += dc->len;
1493                         __remove_discard_cmd(sbi, dc);
1494                 } else {
1495                         dc->ref++;
1496                         need_wait = true;
1497                         break;
1498                 }
1499         }
1500         mutex_unlock(&dcc->cmd_lock);
1501
1502         if (need_wait) {
1503                 trimmed += __wait_one_discard_bio(sbi, dc);
1504                 goto next;
1505         }
1506
1507         return trimmed;
1508 }
1509
1510 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1511                                                 struct discard_policy *dpolicy)
1512 {
1513         struct discard_policy dp;
1514         unsigned int discard_blks;
1515
1516         if (dpolicy)
1517                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1518
1519         /* wait all */
1520         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1521         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1522         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1523         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1524
1525         return discard_blks;
1526 }
1527
1528 /* This should be covered by global mutex, &sit_i->sentry_lock */
1529 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1530 {
1531         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1532         struct discard_cmd *dc;
1533         bool need_wait = false;
1534
1535         mutex_lock(&dcc->cmd_lock);
1536         dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1537                                                         NULL, blkaddr);
1538         if (dc) {
1539                 if (dc->state == D_PREP) {
1540                         __punch_discard_cmd(sbi, dc, blkaddr);
1541                 } else {
1542                         dc->ref++;
1543                         need_wait = true;
1544                 }
1545         }
1546         mutex_unlock(&dcc->cmd_lock);
1547
1548         if (need_wait)
1549                 __wait_one_discard_bio(sbi, dc);
1550 }
1551
1552 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1553 {
1554         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1555
1556         if (dcc && dcc->f2fs_issue_discard) {
1557                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1558
1559                 dcc->f2fs_issue_discard = NULL;
1560                 kthread_stop(discard_thread);
1561         }
1562 }
1563
1564 /* This comes from f2fs_put_super */
1565 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1566 {
1567         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1568         struct discard_policy dpolicy;
1569         bool dropped;
1570
1571         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1572                                         dcc->discard_granularity);
1573         __issue_discard_cmd(sbi, &dpolicy);
1574         dropped = __drop_discard_cmd(sbi);
1575
1576         /* just to make sure there is no pending discard commands */
1577         __wait_all_discard_cmd(sbi, NULL);
1578
1579         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1580         return dropped;
1581 }
1582
1583 static int issue_discard_thread(void *data)
1584 {
1585         struct f2fs_sb_info *sbi = data;
1586         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1587         wait_queue_head_t *q = &dcc->discard_wait_queue;
1588         struct discard_policy dpolicy;
1589         unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1590         int issued;
1591
1592         set_freezable();
1593
1594         do {
1595                 __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1596                                         dcc->discard_granularity);
1597
1598                 wait_event_interruptible_timeout(*q,
1599                                 kthread_should_stop() || freezing(current) ||
1600                                 dcc->discard_wake,
1601                                 msecs_to_jiffies(wait_ms));
1602
1603                 if (dcc->discard_wake)
1604                         dcc->discard_wake = 0;
1605
1606                 if (try_to_freeze())
1607                         continue;
1608                 if (f2fs_readonly(sbi->sb))
1609                         continue;
1610                 if (kthread_should_stop())
1611                         return 0;
1612                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1613                         wait_ms = dpolicy.max_interval;
1614                         continue;
1615                 }
1616
1617                 if (sbi->gc_mode == GC_URGENT)
1618                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1619
1620                 sb_start_intwrite(sbi->sb);
1621
1622                 issued = __issue_discard_cmd(sbi, &dpolicy);
1623                 if (issued > 0) {
1624                         __wait_all_discard_cmd(sbi, &dpolicy);
1625                         wait_ms = dpolicy.min_interval;
1626                 } else if (issued == -1){
1627                         wait_ms = dpolicy.mid_interval;
1628                 } else {
1629                         wait_ms = dpolicy.max_interval;
1630                 }
1631
1632                 sb_end_intwrite(sbi->sb);
1633
1634         } while (!kthread_should_stop());
1635         return 0;
1636 }
1637
1638 #ifdef CONFIG_BLK_DEV_ZONED
1639 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1640                 struct block_device *bdev, block_t blkstart, block_t blklen)
1641 {
1642         sector_t sector, nr_sects;
1643         block_t lblkstart = blkstart;
1644         int devi = 0;
1645
1646         if (f2fs_is_multi_device(sbi)) {
1647                 devi = f2fs_target_device_index(sbi, blkstart);
1648                 blkstart -= FDEV(devi).start_blk;
1649         }
1650
1651         /*
1652          * We need to know the type of the zone: for conventional zones,
1653          * use regular discard if the drive supports it. For sequential
1654          * zones, reset the zone write pointer.
1655          */
1656         switch (get_blkz_type(sbi, bdev, blkstart)) {
1657
1658         case BLK_ZONE_TYPE_CONVENTIONAL:
1659                 if (!blk_queue_discard(bdev_get_queue(bdev)))
1660                         return 0;
1661                 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1662         case BLK_ZONE_TYPE_SEQWRITE_REQ:
1663         case BLK_ZONE_TYPE_SEQWRITE_PREF:
1664                 sector = SECTOR_FROM_BLOCK(blkstart);
1665                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1666
1667                 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1668                                 nr_sects != bdev_zone_sectors(bdev)) {
1669                         f2fs_msg(sbi->sb, KERN_INFO,
1670                                 "(%d) %s: Unaligned discard attempted (block %x + %x)",
1671                                 devi, sbi->s_ndevs ? FDEV(devi).path: "",
1672                                 blkstart, blklen);
1673                         return -EIO;
1674                 }
1675                 trace_f2fs_issue_reset_zone(bdev, blkstart);
1676                 return blkdev_reset_zones(bdev, sector,
1677                                           nr_sects, GFP_NOFS);
1678         default:
1679                 /* Unknown zone type: broken device ? */
1680                 return -EIO;
1681         }
1682 }
1683 #endif
1684
1685 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1686                 struct block_device *bdev, block_t blkstart, block_t blklen)
1687 {
1688 #ifdef CONFIG_BLK_DEV_ZONED
1689         if (f2fs_sb_has_blkzoned(sbi->sb) &&
1690                                 bdev_zoned_model(bdev) != BLK_ZONED_NONE)
1691                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1692 #endif
1693         return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1694 }
1695
1696 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1697                                 block_t blkstart, block_t blklen)
1698 {
1699         sector_t start = blkstart, len = 0;
1700         struct block_device *bdev;
1701         struct seg_entry *se;
1702         unsigned int offset;
1703         block_t i;
1704         int err = 0;
1705
1706         bdev = f2fs_target_device(sbi, blkstart, NULL);
1707
1708         for (i = blkstart; i < blkstart + blklen; i++, len++) {
1709                 if (i != start) {
1710                         struct block_device *bdev2 =
1711                                 f2fs_target_device(sbi, i, NULL);
1712
1713                         if (bdev2 != bdev) {
1714                                 err = __issue_discard_async(sbi, bdev,
1715                                                 start, len);
1716                                 if (err)
1717                                         return err;
1718                                 bdev = bdev2;
1719                                 start = i;
1720                                 len = 0;
1721                         }
1722                 }
1723
1724                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1725                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1726
1727                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
1728                         sbi->discard_blks--;
1729         }
1730
1731         if (len)
1732                 err = __issue_discard_async(sbi, bdev, start, len);
1733         return err;
1734 }
1735
1736 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1737                                                         bool check_only)
1738 {
1739         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1740         int max_blocks = sbi->blocks_per_seg;
1741         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1742         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1743         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1744         unsigned long *discard_map = (unsigned long *)se->discard_map;
1745         unsigned long *dmap = SIT_I(sbi)->tmp_map;
1746         unsigned int start = 0, end = -1;
1747         bool force = (cpc->reason & CP_DISCARD);
1748         struct discard_entry *de = NULL;
1749         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1750         int i;
1751
1752         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1753                 return false;
1754
1755         if (!force) {
1756                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1757                         SM_I(sbi)->dcc_info->nr_discards >=
1758                                 SM_I(sbi)->dcc_info->max_discards)
1759                         return false;
1760         }
1761
1762         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1763         for (i = 0; i < entries; i++)
1764                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1765                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1766
1767         while (force || SM_I(sbi)->dcc_info->nr_discards <=
1768                                 SM_I(sbi)->dcc_info->max_discards) {
1769                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1770                 if (start >= max_blocks)
1771                         break;
1772
1773                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1774                 if (force && start && end != max_blocks
1775                                         && (end - start) < cpc->trim_minlen)
1776                         continue;
1777
1778                 if (check_only)
1779                         return true;
1780
1781                 if (!de) {
1782                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
1783                                                                 GFP_F2FS_ZERO);
1784                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1785                         list_add_tail(&de->list, head);
1786                 }
1787
1788                 for (i = start; i < end; i++)
1789                         __set_bit_le(i, (void *)de->discard_map);
1790
1791                 SM_I(sbi)->dcc_info->nr_discards += end - start;
1792         }
1793         return false;
1794 }
1795
1796 static void release_discard_addr(struct discard_entry *entry)
1797 {
1798         list_del(&entry->list);
1799         kmem_cache_free(discard_entry_slab, entry);
1800 }
1801
1802 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1803 {
1804         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1805         struct discard_entry *entry, *this;
1806
1807         /* drop caches */
1808         list_for_each_entry_safe(entry, this, head, list)
1809                 release_discard_addr(entry);
1810 }
1811
1812 /*
1813  * Should call f2fs_clear_prefree_segments after checkpoint is done.
1814  */
1815 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1816 {
1817         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1818         unsigned int segno;
1819
1820         mutex_lock(&dirty_i->seglist_lock);
1821         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1822                 __set_test_and_free(sbi, segno);
1823         mutex_unlock(&dirty_i->seglist_lock);
1824 }
1825
1826 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1827                                                 struct cp_control *cpc)
1828 {
1829         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1830         struct list_head *head = &dcc->entry_list;
1831         struct discard_entry *entry, *this;
1832         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1833         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1834         unsigned int start = 0, end = -1;
1835         unsigned int secno, start_segno;
1836         bool force = (cpc->reason & CP_DISCARD);
1837         bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
1838
1839         mutex_lock(&dirty_i->seglist_lock);
1840
1841         while (1) {
1842                 int i;
1843
1844                 if (need_align && end != -1)
1845                         end--;
1846                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1847                 if (start >= MAIN_SEGS(sbi))
1848                         break;
1849                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1850                                                                 start + 1);
1851
1852                 if (need_align) {
1853                         start = rounddown(start, sbi->segs_per_sec);
1854                         end = roundup(end, sbi->segs_per_sec);
1855                 }
1856
1857                 for (i = start; i < end; i++) {
1858                         if (test_and_clear_bit(i, prefree_map))
1859                                 dirty_i->nr_dirty[PRE]--;
1860                 }
1861
1862                 if (!f2fs_realtime_discard_enable(sbi))
1863                         continue;
1864
1865                 if (force && start >= cpc->trim_start &&
1866                                         (end - 1) <= cpc->trim_end)
1867                                 continue;
1868
1869                 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
1870                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1871                                 (end - start) << sbi->log_blocks_per_seg);
1872                         continue;
1873                 }
1874 next:
1875                 secno = GET_SEC_FROM_SEG(sbi, start);
1876                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
1877                 if (!IS_CURSEC(sbi, secno) &&
1878                         !get_valid_blocks(sbi, start, true))
1879                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1880                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
1881
1882                 start = start_segno + sbi->segs_per_sec;
1883                 if (start < end)
1884                         goto next;
1885                 else
1886                         end = start - 1;
1887         }
1888         mutex_unlock(&dirty_i->seglist_lock);
1889
1890         /* send small discards */
1891         list_for_each_entry_safe(entry, this, head, list) {
1892                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
1893                 bool is_valid = test_bit_le(0, entry->discard_map);
1894
1895 find_next:
1896                 if (is_valid) {
1897                         next_pos = find_next_zero_bit_le(entry->discard_map,
1898                                         sbi->blocks_per_seg, cur_pos);
1899                         len = next_pos - cur_pos;
1900
1901                         if (f2fs_sb_has_blkzoned(sbi->sb) ||
1902                             (force && len < cpc->trim_minlen))
1903                                 goto skip;
1904
1905                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
1906                                                                         len);
1907                         total_len += len;
1908                 } else {
1909                         next_pos = find_next_bit_le(entry->discard_map,
1910                                         sbi->blocks_per_seg, cur_pos);
1911                 }
1912 skip:
1913                 cur_pos = next_pos;
1914                 is_valid = !is_valid;
1915
1916                 if (cur_pos < sbi->blocks_per_seg)
1917                         goto find_next;
1918
1919                 release_discard_addr(entry);
1920                 dcc->nr_discards -= total_len;
1921         }
1922
1923         wake_up_discard_thread(sbi, false);
1924 }
1925
1926 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1927 {
1928         dev_t dev = sbi->sb->s_bdev->bd_dev;
1929         struct discard_cmd_control *dcc;
1930         int err = 0, i;
1931
1932         if (SM_I(sbi)->dcc_info) {
1933                 dcc = SM_I(sbi)->dcc_info;
1934                 goto init_thread;
1935         }
1936
1937         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1938         if (!dcc)
1939                 return -ENOMEM;
1940
1941         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1942         INIT_LIST_HEAD(&dcc->entry_list);
1943         for (i = 0; i < MAX_PLIST_NUM; i++)
1944                 INIT_LIST_HEAD(&dcc->pend_list[i]);
1945         INIT_LIST_HEAD(&dcc->wait_list);
1946         INIT_LIST_HEAD(&dcc->fstrim_list);
1947         mutex_init(&dcc->cmd_lock);
1948         atomic_set(&dcc->issued_discard, 0);
1949         atomic_set(&dcc->issing_discard, 0);
1950         atomic_set(&dcc->discard_cmd_cnt, 0);
1951         dcc->nr_discards = 0;
1952         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
1953         dcc->undiscard_blks = 0;
1954         dcc->next_pos = 0;
1955         dcc->root = RB_ROOT;
1956         dcc->rbtree_check = false;
1957
1958         init_waitqueue_head(&dcc->discard_wait_queue);
1959         SM_I(sbi)->dcc_info = dcc;
1960 init_thread:
1961         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
1962                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
1963         if (IS_ERR(dcc->f2fs_issue_discard)) {
1964                 err = PTR_ERR(dcc->f2fs_issue_discard);
1965                 kfree(dcc);
1966                 SM_I(sbi)->dcc_info = NULL;
1967                 return err;
1968         }
1969
1970         return err;
1971 }
1972
1973 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
1974 {
1975         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1976
1977         if (!dcc)
1978                 return;
1979
1980         f2fs_stop_discard_thread(sbi);
1981
1982         kfree(dcc);
1983         SM_I(sbi)->dcc_info = NULL;
1984 }
1985
1986 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
1987 {
1988         struct sit_info *sit_i = SIT_I(sbi);
1989
1990         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
1991                 sit_i->dirty_sentries++;
1992                 return false;
1993         }
1994
1995         return true;
1996 }
1997
1998 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
1999                                         unsigned int segno, int modified)
2000 {
2001         struct seg_entry *se = get_seg_entry(sbi, segno);
2002         se->type = type;
2003         if (modified)
2004                 __mark_sit_entry_dirty(sbi, segno);
2005 }
2006
2007 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2008 {
2009         struct seg_entry *se;
2010         unsigned int segno, offset;
2011         long int new_vblocks;
2012         bool exist;
2013 #ifdef CONFIG_F2FS_CHECK_FS
2014         bool mir_exist;
2015 #endif
2016
2017         segno = GET_SEGNO(sbi, blkaddr);
2018
2019         se = get_seg_entry(sbi, segno);
2020         new_vblocks = se->valid_blocks + del;
2021         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2022
2023         f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2024                                 (new_vblocks > sbi->blocks_per_seg)));
2025
2026         se->valid_blocks = new_vblocks;
2027         se->mtime = get_mtime(sbi, false);
2028         if (se->mtime > SIT_I(sbi)->max_mtime)
2029                 SIT_I(sbi)->max_mtime = se->mtime;
2030
2031         /* Update valid block bitmap */
2032         if (del > 0) {
2033                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2034 #ifdef CONFIG_F2FS_CHECK_FS
2035                 mir_exist = f2fs_test_and_set_bit(offset,
2036                                                 se->cur_valid_map_mir);
2037                 if (unlikely(exist != mir_exist)) {
2038                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2039                                 "when setting bitmap, blk:%u, old bit:%d",
2040                                 blkaddr, exist);
2041                         f2fs_bug_on(sbi, 1);
2042                 }
2043 #endif
2044                 if (unlikely(exist)) {
2045                         f2fs_msg(sbi->sb, KERN_ERR,
2046                                 "Bitmap was wrongly set, blk:%u", blkaddr);
2047                         f2fs_bug_on(sbi, 1);
2048                         se->valid_blocks--;
2049                         del = 0;
2050                 }
2051
2052                 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2053                         sbi->discard_blks--;
2054
2055                 /* don't overwrite by SSR to keep node chain */
2056                 if (IS_NODESEG(se->type)) {
2057                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2058                                 se->ckpt_valid_blocks++;
2059                 }
2060         } else {
2061                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2062 #ifdef CONFIG_F2FS_CHECK_FS
2063                 mir_exist = f2fs_test_and_clear_bit(offset,
2064                                                 se->cur_valid_map_mir);
2065                 if (unlikely(exist != mir_exist)) {
2066                         f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
2067                                 "when clearing bitmap, blk:%u, old bit:%d",
2068                                 blkaddr, exist);
2069                         f2fs_bug_on(sbi, 1);
2070                 }
2071 #endif
2072                 if (unlikely(!exist)) {
2073                         f2fs_msg(sbi->sb, KERN_ERR,
2074                                 "Bitmap was wrongly cleared, blk:%u", blkaddr);
2075                         f2fs_bug_on(sbi, 1);
2076                         se->valid_blocks++;
2077                         del = 0;
2078                 }
2079
2080                 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2081                         sbi->discard_blks++;
2082         }
2083         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2084                 se->ckpt_valid_blocks += del;
2085
2086         __mark_sit_entry_dirty(sbi, segno);
2087
2088         /* update total number of valid blocks to be written in ckpt area */
2089         SIT_I(sbi)->written_valid_blocks += del;
2090
2091         if (sbi->segs_per_sec > 1)
2092                 get_sec_entry(sbi, segno)->valid_blocks += del;
2093 }
2094
2095 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2096 {
2097         unsigned int segno = GET_SEGNO(sbi, addr);
2098         struct sit_info *sit_i = SIT_I(sbi);
2099
2100         f2fs_bug_on(sbi, addr == NULL_ADDR);
2101         if (addr == NEW_ADDR)
2102                 return;
2103
2104         invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2105
2106         /* add it into sit main buffer */
2107         down_write(&sit_i->sentry_lock);
2108
2109         update_sit_entry(sbi, addr, -1);
2110
2111         /* add it into dirty seglist */
2112         locate_dirty_segment(sbi, segno);
2113
2114         up_write(&sit_i->sentry_lock);
2115 }
2116
2117 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2118 {
2119         struct sit_info *sit_i = SIT_I(sbi);
2120         unsigned int segno, offset;
2121         struct seg_entry *se;
2122         bool is_cp = false;
2123
2124         if (!is_valid_data_blkaddr(sbi, blkaddr))
2125                 return true;
2126
2127         down_read(&sit_i->sentry_lock);
2128
2129         segno = GET_SEGNO(sbi, blkaddr);
2130         se = get_seg_entry(sbi, segno);
2131         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2132
2133         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2134                 is_cp = true;
2135
2136         up_read(&sit_i->sentry_lock);
2137
2138         return is_cp;
2139 }
2140
2141 /*
2142  * This function should be resided under the curseg_mutex lock
2143  */
2144 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2145                                         struct f2fs_summary *sum)
2146 {
2147         struct curseg_info *curseg = CURSEG_I(sbi, type);
2148         void *addr = curseg->sum_blk;
2149         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2150         memcpy(addr, sum, sizeof(struct f2fs_summary));
2151 }
2152
2153 /*
2154  * Calculate the number of current summary pages for writing
2155  */
2156 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2157 {
2158         int valid_sum_count = 0;
2159         int i, sum_in_page;
2160
2161         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2162                 if (sbi->ckpt->alloc_type[i] == SSR)
2163                         valid_sum_count += sbi->blocks_per_seg;
2164                 else {
2165                         if (for_ra)
2166                                 valid_sum_count += le16_to_cpu(
2167                                         F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2168                         else
2169                                 valid_sum_count += curseg_blkoff(sbi, i);
2170                 }
2171         }
2172
2173         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2174                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2175         if (valid_sum_count <= sum_in_page)
2176                 return 1;
2177         else if ((valid_sum_count - sum_in_page) <=
2178                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2179                 return 2;
2180         return 3;
2181 }
2182
2183 /*
2184  * Caller should put this summary page
2185  */
2186 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2187 {
2188         return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
2189 }
2190
2191 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2192                                         void *src, block_t blk_addr)
2193 {
2194         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2195
2196         memcpy(page_address(page), src, PAGE_SIZE);
2197         set_page_dirty(page);
2198         f2fs_put_page(page, 1);
2199 }
2200
2201 static void write_sum_page(struct f2fs_sb_info *sbi,
2202                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2203 {
2204         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2205 }
2206
2207 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2208                                                 int type, block_t blk_addr)
2209 {
2210         struct curseg_info *curseg = CURSEG_I(sbi, type);
2211         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2212         struct f2fs_summary_block *src = curseg->sum_blk;
2213         struct f2fs_summary_block *dst;
2214
2215         dst = (struct f2fs_summary_block *)page_address(page);
2216         memset(dst, 0, PAGE_SIZE);
2217
2218         mutex_lock(&curseg->curseg_mutex);
2219
2220         down_read(&curseg->journal_rwsem);
2221         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2222         up_read(&curseg->journal_rwsem);
2223
2224         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2225         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2226
2227         mutex_unlock(&curseg->curseg_mutex);
2228
2229         set_page_dirty(page);
2230         f2fs_put_page(page, 1);
2231 }
2232
2233 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2234 {
2235         struct curseg_info *curseg = CURSEG_I(sbi, type);
2236         unsigned int segno = curseg->segno + 1;
2237         struct free_segmap_info *free_i = FREE_I(sbi);
2238
2239         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2240                 return !test_bit(segno, free_i->free_segmap);
2241         return 0;
2242 }
2243
2244 /*
2245  * Find a new segment from the free segments bitmap to right order
2246  * This function should be returned with success, otherwise BUG
2247  */
2248 static void get_new_segment(struct f2fs_sb_info *sbi,
2249                         unsigned int *newseg, bool new_sec, int dir)
2250 {
2251         struct free_segmap_info *free_i = FREE_I(sbi);
2252         unsigned int segno, secno, zoneno;
2253         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2254         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2255         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2256         unsigned int left_start = hint;
2257         bool init = true;
2258         int go_left = 0;
2259         int i;
2260
2261         spin_lock(&free_i->segmap_lock);
2262
2263         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2264                 segno = find_next_zero_bit(free_i->free_segmap,
2265                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2266                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2267                         goto got_it;
2268         }
2269 find_other_zone:
2270         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2271         if (secno >= MAIN_SECS(sbi)) {
2272                 if (dir == ALLOC_RIGHT) {
2273                         secno = find_next_zero_bit(free_i->free_secmap,
2274                                                         MAIN_SECS(sbi), 0);
2275                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2276                 } else {
2277                         go_left = 1;
2278                         left_start = hint - 1;
2279                 }
2280         }
2281         if (go_left == 0)
2282                 goto skip_left;
2283
2284         while (test_bit(left_start, free_i->free_secmap)) {
2285                 if (left_start > 0) {
2286                         left_start--;
2287                         continue;
2288                 }
2289                 left_start = find_next_zero_bit(free_i->free_secmap,
2290                                                         MAIN_SECS(sbi), 0);
2291                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2292                 break;
2293         }
2294         secno = left_start;
2295 skip_left:
2296         segno = GET_SEG_FROM_SEC(sbi, secno);
2297         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2298
2299         /* give up on finding another zone */
2300         if (!init)
2301                 goto got_it;
2302         if (sbi->secs_per_zone == 1)
2303                 goto got_it;
2304         if (zoneno == old_zoneno)
2305                 goto got_it;
2306         if (dir == ALLOC_LEFT) {
2307                 if (!go_left && zoneno + 1 >= total_zones)
2308                         goto got_it;
2309                 if (go_left && zoneno == 0)
2310                         goto got_it;
2311         }
2312         for (i = 0; i < NR_CURSEG_TYPE; i++)
2313                 if (CURSEG_I(sbi, i)->zone == zoneno)
2314                         break;
2315
2316         if (i < NR_CURSEG_TYPE) {
2317                 /* zone is in user, try another */
2318                 if (go_left)
2319                         hint = zoneno * sbi->secs_per_zone - 1;
2320                 else if (zoneno + 1 >= total_zones)
2321                         hint = 0;
2322                 else
2323                         hint = (zoneno + 1) * sbi->secs_per_zone;
2324                 init = false;
2325                 goto find_other_zone;
2326         }
2327 got_it:
2328         /* set it as dirty segment in free segmap */
2329         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2330         __set_inuse(sbi, segno);
2331         *newseg = segno;
2332         spin_unlock(&free_i->segmap_lock);
2333 }
2334
2335 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2336 {
2337         struct curseg_info *curseg = CURSEG_I(sbi, type);
2338         struct summary_footer *sum_footer;
2339
2340         curseg->segno = curseg->next_segno;
2341         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2342         curseg->next_blkoff = 0;
2343         curseg->next_segno = NULL_SEGNO;
2344
2345         sum_footer = &(curseg->sum_blk->footer);
2346         memset(sum_footer, 0, sizeof(struct summary_footer));
2347         if (IS_DATASEG(type))
2348                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2349         if (IS_NODESEG(type))
2350                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2351         __set_sit_entry_type(sbi, type, curseg->segno, modified);
2352 }
2353
2354 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2355 {
2356         /* if segs_per_sec is large than 1, we need to keep original policy. */
2357         if (sbi->segs_per_sec != 1)
2358                 return CURSEG_I(sbi, type)->segno;
2359
2360         if (test_opt(sbi, NOHEAP) &&
2361                 (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2362                 return 0;
2363
2364         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2365                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2366
2367         /* find segments from 0 to reuse freed segments */
2368         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2369                 return 0;
2370
2371         return CURSEG_I(sbi, type)->segno;
2372 }
2373
2374 /*
2375  * Allocate a current working segment.
2376  * This function always allocates a free segment in LFS manner.
2377  */
2378 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2379 {
2380         struct curseg_info *curseg = CURSEG_I(sbi, type);
2381         unsigned int segno = curseg->segno;
2382         int dir = ALLOC_LEFT;
2383
2384         write_sum_page(sbi, curseg->sum_blk,
2385                                 GET_SUM_BLOCK(sbi, segno));
2386         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2387                 dir = ALLOC_RIGHT;
2388
2389         if (test_opt(sbi, NOHEAP))
2390                 dir = ALLOC_RIGHT;
2391
2392         segno = __get_next_segno(sbi, type);
2393         get_new_segment(sbi, &segno, new_sec, dir);
2394         curseg->next_segno = segno;
2395         reset_curseg(sbi, type, 1);
2396         curseg->alloc_type = LFS;
2397 }
2398
2399 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2400                         struct curseg_info *seg, block_t start)
2401 {
2402         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2403         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2404         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2405         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2406         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2407         int i, pos;
2408
2409         for (i = 0; i < entries; i++)
2410                 target_map[i] = ckpt_map[i] | cur_map[i];
2411
2412         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2413
2414         seg->next_blkoff = pos;
2415 }
2416
2417 /*
2418  * If a segment is written by LFS manner, next block offset is just obtained
2419  * by increasing the current block offset. However, if a segment is written by
2420  * SSR manner, next block offset obtained by calling __next_free_blkoff
2421  */
2422 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2423                                 struct curseg_info *seg)
2424 {
2425         if (seg->alloc_type == SSR)
2426                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2427         else
2428                 seg->next_blkoff++;
2429 }
2430
2431 /*
2432  * This function always allocates a used segment(from dirty seglist) by SSR
2433  * manner, so it should recover the existing segment information of valid blocks
2434  */
2435 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2436 {
2437         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2438         struct curseg_info *curseg = CURSEG_I(sbi, type);
2439         unsigned int new_segno = curseg->next_segno;
2440         struct f2fs_summary_block *sum_node;
2441         struct page *sum_page;
2442
2443         write_sum_page(sbi, curseg->sum_blk,
2444                                 GET_SUM_BLOCK(sbi, curseg->segno));
2445         __set_test_and_inuse(sbi, new_segno);
2446
2447         mutex_lock(&dirty_i->seglist_lock);
2448         __remove_dirty_segment(sbi, new_segno, PRE);
2449         __remove_dirty_segment(sbi, new_segno, DIRTY);
2450         mutex_unlock(&dirty_i->seglist_lock);
2451
2452         reset_curseg(sbi, type, 1);
2453         curseg->alloc_type = SSR;
2454         __next_free_blkoff(sbi, curseg, 0);
2455
2456         sum_page = f2fs_get_sum_page(sbi, new_segno);
2457         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2458         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2459         f2fs_put_page(sum_page, 1);
2460 }
2461
2462 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2463 {
2464         struct curseg_info *curseg = CURSEG_I(sbi, type);
2465         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2466         unsigned segno = NULL_SEGNO;
2467         int i, cnt;
2468         bool reversed = false;
2469
2470         /* f2fs_need_SSR() already forces to do this */
2471         if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2472                 curseg->next_segno = segno;
2473                 return 1;
2474         }
2475
2476         /* For node segments, let's do SSR more intensively */
2477         if (IS_NODESEG(type)) {
2478                 if (type >= CURSEG_WARM_NODE) {
2479                         reversed = true;
2480                         i = CURSEG_COLD_NODE;
2481                 } else {
2482                         i = CURSEG_HOT_NODE;
2483                 }
2484                 cnt = NR_CURSEG_NODE_TYPE;
2485         } else {
2486                 if (type >= CURSEG_WARM_DATA) {
2487                         reversed = true;
2488                         i = CURSEG_COLD_DATA;
2489                 } else {
2490                         i = CURSEG_HOT_DATA;
2491                 }
2492                 cnt = NR_CURSEG_DATA_TYPE;
2493         }
2494
2495         for (; cnt-- > 0; reversed ? i-- : i++) {
2496                 if (i == type)
2497                         continue;
2498                 if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2499                         curseg->next_segno = segno;
2500                         return 1;
2501                 }
2502         }
2503         return 0;
2504 }
2505
2506 /*
2507  * flush out current segment and replace it with new segment
2508  * This function should be returned with success, otherwise BUG
2509  */
2510 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2511                                                 int type, bool force)
2512 {
2513         struct curseg_info *curseg = CURSEG_I(sbi, type);
2514
2515         if (force)
2516                 new_curseg(sbi, type, true);
2517         else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2518                                         type == CURSEG_WARM_NODE)
2519                 new_curseg(sbi, type, false);
2520         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
2521                 new_curseg(sbi, type, false);
2522         else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2523                 change_curseg(sbi, type);
2524         else
2525                 new_curseg(sbi, type, false);
2526
2527         stat_inc_seg_type(sbi, curseg);
2528 }
2529
2530 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
2531 {
2532         struct curseg_info *curseg;
2533         unsigned int old_segno;
2534         int i;
2535
2536         down_write(&SIT_I(sbi)->sentry_lock);
2537
2538         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2539                 curseg = CURSEG_I(sbi, i);
2540                 old_segno = curseg->segno;
2541                 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2542                 locate_dirty_segment(sbi, old_segno);
2543         }
2544
2545         up_write(&SIT_I(sbi)->sentry_lock);
2546 }
2547
2548 static const struct segment_allocation default_salloc_ops = {
2549         .allocate_segment = allocate_segment_by_default,
2550 };
2551
2552 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2553                                                 struct cp_control *cpc)
2554 {
2555         __u64 trim_start = cpc->trim_start;
2556         bool has_candidate = false;
2557
2558         down_write(&SIT_I(sbi)->sentry_lock);
2559         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2560                 if (add_discard_addrs(sbi, cpc, true)) {
2561                         has_candidate = true;
2562                         break;
2563                 }
2564         }
2565         up_write(&SIT_I(sbi)->sentry_lock);
2566
2567         cpc->trim_start = trim_start;
2568         return has_candidate;
2569 }
2570
2571 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2572                                         struct discard_policy *dpolicy,
2573                                         unsigned int start, unsigned int end)
2574 {
2575         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2576         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2577         struct rb_node **insert_p = NULL, *insert_parent = NULL;
2578         struct discard_cmd *dc;
2579         struct blk_plug plug;
2580         int issued;
2581         unsigned int trimmed = 0;
2582
2583 next:
2584         issued = 0;
2585
2586         mutex_lock(&dcc->cmd_lock);
2587         if (unlikely(dcc->rbtree_check))
2588                 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2589                                                                 &dcc->root));
2590
2591         dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2592                                         NULL, start,
2593                                         (struct rb_entry **)&prev_dc,
2594                                         (struct rb_entry **)&next_dc,
2595                                         &insert_p, &insert_parent, true);
2596         if (!dc)
2597                 dc = next_dc;
2598
2599         blk_start_plug(&plug);
2600
2601         while (dc && dc->lstart <= end) {
2602                 struct rb_node *node;
2603                 int err = 0;
2604
2605                 if (dc->len < dpolicy->granularity)
2606                         goto skip;
2607
2608                 if (dc->state != D_PREP) {
2609                         list_move_tail(&dc->list, &dcc->fstrim_list);
2610                         goto skip;
2611                 }
2612
2613                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2614
2615                 if (issued >= dpolicy->max_requests) {
2616                         start = dc->lstart + dc->len;
2617
2618                         if (err)
2619                                 __remove_discard_cmd(sbi, dc);
2620
2621                         blk_finish_plug(&plug);
2622                         mutex_unlock(&dcc->cmd_lock);
2623                         trimmed += __wait_all_discard_cmd(sbi, NULL);
2624                         congestion_wait(BLK_RW_ASYNC, HZ/50);
2625                         goto next;
2626                 }
2627 skip:
2628                 node = rb_next(&dc->rb_node);
2629                 if (err)
2630                         __remove_discard_cmd(sbi, dc);
2631                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2632
2633                 if (fatal_signal_pending(current))
2634                         break;
2635         }
2636
2637         blk_finish_plug(&plug);
2638         mutex_unlock(&dcc->cmd_lock);
2639
2640         return trimmed;
2641 }
2642
2643 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2644 {
2645         __u64 start = F2FS_BYTES_TO_BLK(range->start);
2646         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2647         unsigned int start_segno, end_segno;
2648         block_t start_block, end_block;
2649         struct cp_control cpc;
2650         struct discard_policy dpolicy;
2651         unsigned long long trimmed = 0;
2652         int err = 0;
2653         bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
2654
2655         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2656                 return -EINVAL;
2657
2658         if (end < MAIN_BLKADDR(sbi))
2659                 goto out;
2660
2661         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2662                 f2fs_msg(sbi->sb, KERN_WARNING,
2663                         "Found FS corruption, run fsck to fix.");
2664                 return -EFSCORRUPTED;
2665         }
2666
2667         /* start/end segment number in main_area */
2668         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2669         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2670                                                 GET_SEGNO(sbi, end);
2671         if (need_align) {
2672                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
2673                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2674         }
2675
2676         cpc.reason = CP_DISCARD;
2677         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2678         cpc.trim_start = start_segno;
2679         cpc.trim_end = end_segno;
2680
2681         if (sbi->discard_blks == 0)
2682                 goto out;
2683
2684         mutex_lock(&sbi->gc_mutex);
2685         err = f2fs_write_checkpoint(sbi, &cpc);
2686         mutex_unlock(&sbi->gc_mutex);
2687         if (err)
2688                 goto out;
2689
2690         /*
2691          * We filed discard candidates, but actually we don't need to wait for
2692          * all of them, since they'll be issued in idle time along with runtime
2693          * discard option. User configuration looks like using runtime discard
2694          * or periodic fstrim instead of it.
2695          */
2696         if (f2fs_realtime_discard_enable(sbi))
2697                 goto out;
2698
2699         start_block = START_BLOCK(sbi, start_segno);
2700         end_block = START_BLOCK(sbi, end_segno + 1);
2701
2702         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2703         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2704                                         start_block, end_block);
2705
2706         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2707                                         start_block, end_block);
2708 out:
2709         if (!err)
2710                 range->len = F2FS_BLK_TO_BYTES(trimmed);
2711         return err;
2712 }
2713
2714 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2715 {
2716         struct curseg_info *curseg = CURSEG_I(sbi, type);
2717         if (curseg->next_blkoff < sbi->blocks_per_seg)
2718                 return true;
2719         return false;
2720 }
2721
2722 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2723 {
2724         switch (hint) {
2725         case WRITE_LIFE_SHORT:
2726                 return CURSEG_HOT_DATA;
2727         case WRITE_LIFE_EXTREME:
2728                 return CURSEG_COLD_DATA;
2729         default:
2730                 return CURSEG_WARM_DATA;
2731         }
2732 }
2733
2734 /* This returns write hints for each segment type. This hints will be
2735  * passed down to block layer. There are mapping tables which depend on
2736  * the mount option 'whint_mode'.
2737  *
2738  * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2739  *
2740  * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2741  *
2742  * User                  F2FS                     Block
2743  * ----                  ----                     -----
2744  *                       META                     WRITE_LIFE_NOT_SET
2745  *                       HOT_NODE                 "
2746  *                       WARM_NODE                "
2747  *                       COLD_NODE                "
2748  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2749  * extension list        "                        "
2750  *
2751  * -- buffered io
2752  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2753  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2754  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2755  * WRITE_LIFE_NONE       "                        "
2756  * WRITE_LIFE_MEDIUM     "                        "
2757  * WRITE_LIFE_LONG       "                        "
2758  *
2759  * -- direct io
2760  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2761  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2762  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2763  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2764  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2765  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2766  *
2767  * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2768  *
2769  * User                  F2FS                     Block
2770  * ----                  ----                     -----
2771  *                       META                     WRITE_LIFE_MEDIUM;
2772  *                       HOT_NODE                 WRITE_LIFE_NOT_SET
2773  *                       WARM_NODE                "
2774  *                       COLD_NODE                WRITE_LIFE_NONE
2775  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2776  * extension list        "                        "
2777  *
2778  * -- buffered io
2779  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2780  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2781  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
2782  * WRITE_LIFE_NONE       "                        "
2783  * WRITE_LIFE_MEDIUM     "                        "
2784  * WRITE_LIFE_LONG       "                        "
2785  *
2786  * -- direct io
2787  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2788  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2789  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2790  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2791  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2792  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2793  */
2794
2795 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2796                                 enum page_type type, enum temp_type temp)
2797 {
2798         if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2799                 if (type == DATA) {
2800                         if (temp == WARM)
2801                                 return WRITE_LIFE_NOT_SET;
2802                         else if (temp == HOT)
2803                                 return WRITE_LIFE_SHORT;
2804                         else if (temp == COLD)
2805                                 return WRITE_LIFE_EXTREME;
2806                 } else {
2807                         return WRITE_LIFE_NOT_SET;
2808                 }
2809         } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2810                 if (type == DATA) {
2811                         if (temp == WARM)
2812                                 return WRITE_LIFE_LONG;
2813                         else if (temp == HOT)
2814                                 return WRITE_LIFE_SHORT;
2815                         else if (temp == COLD)
2816                                 return WRITE_LIFE_EXTREME;
2817                 } else if (type == NODE) {
2818                         if (temp == WARM || temp == HOT)
2819                                 return WRITE_LIFE_NOT_SET;
2820                         else if (temp == COLD)
2821                                 return WRITE_LIFE_NONE;
2822                 } else if (type == META) {
2823                         return WRITE_LIFE_MEDIUM;
2824                 }
2825         }
2826         return WRITE_LIFE_NOT_SET;
2827 }
2828
2829 static int __get_segment_type_2(struct f2fs_io_info *fio)
2830 {
2831         if (fio->type == DATA)
2832                 return CURSEG_HOT_DATA;
2833         else
2834                 return CURSEG_HOT_NODE;
2835 }
2836
2837 static int __get_segment_type_4(struct f2fs_io_info *fio)
2838 {
2839         if (fio->type == DATA) {
2840                 struct inode *inode = fio->page->mapping->host;
2841
2842                 if (S_ISDIR(inode->i_mode))
2843                         return CURSEG_HOT_DATA;
2844                 else
2845                         return CURSEG_COLD_DATA;
2846         } else {
2847                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2848                         return CURSEG_WARM_NODE;
2849                 else
2850                         return CURSEG_COLD_NODE;
2851         }
2852 }
2853
2854 static int __get_segment_type_6(struct f2fs_io_info *fio)
2855 {
2856         if (fio->type == DATA) {
2857                 struct inode *inode = fio->page->mapping->host;
2858
2859                 if (is_cold_data(fio->page) || file_is_cold(inode))
2860                         return CURSEG_COLD_DATA;
2861                 if (file_is_hot(inode) ||
2862                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
2863                                 f2fs_is_atomic_file(inode) ||
2864                                 f2fs_is_volatile_file(inode))
2865                         return CURSEG_HOT_DATA;
2866                 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
2867         } else {
2868                 if (IS_DNODE(fio->page))
2869                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
2870                                                 CURSEG_HOT_NODE;
2871                 return CURSEG_COLD_NODE;
2872         }
2873 }
2874
2875 static int __get_segment_type(struct f2fs_io_info *fio)
2876 {
2877         int type = 0;
2878
2879         switch (F2FS_OPTION(fio->sbi).active_logs) {
2880         case 2:
2881                 type = __get_segment_type_2(fio);
2882                 break;
2883         case 4:
2884                 type = __get_segment_type_4(fio);
2885                 break;
2886         case 6:
2887                 type = __get_segment_type_6(fio);
2888                 break;
2889         default:
2890                 f2fs_bug_on(fio->sbi, true);
2891         }
2892
2893         if (IS_HOT(type))
2894                 fio->temp = HOT;
2895         else if (IS_WARM(type))
2896                 fio->temp = WARM;
2897         else
2898                 fio->temp = COLD;
2899         return type;
2900 }
2901
2902 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2903                 block_t old_blkaddr, block_t *new_blkaddr,
2904                 struct f2fs_summary *sum, int type,
2905                 struct f2fs_io_info *fio, bool add_list)
2906 {
2907         struct sit_info *sit_i = SIT_I(sbi);
2908         struct curseg_info *curseg = CURSEG_I(sbi, type);
2909
2910         down_read(&SM_I(sbi)->curseg_lock);
2911
2912         mutex_lock(&curseg->curseg_mutex);
2913         down_write(&sit_i->sentry_lock);
2914
2915         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
2916
2917         f2fs_wait_discard_bio(sbi, *new_blkaddr);
2918
2919         /*
2920          * __add_sum_entry should be resided under the curseg_mutex
2921          * because, this function updates a summary entry in the
2922          * current summary block.
2923          */
2924         __add_sum_entry(sbi, type, sum);
2925
2926         __refresh_next_blkoff(sbi, curseg);
2927
2928         stat_inc_block_count(sbi, curseg);
2929
2930         /*
2931          * SIT information should be updated before segment allocation,
2932          * since SSR needs latest valid block information.
2933          */
2934         update_sit_entry(sbi, *new_blkaddr, 1);
2935         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
2936                 update_sit_entry(sbi, old_blkaddr, -1);
2937
2938         if (!__has_curseg_space(sbi, type))
2939                 sit_i->s_ops->allocate_segment(sbi, type, false);
2940
2941         /*
2942          * segment dirty status should be updated after segment allocation,
2943          * so we just need to update status only one time after previous
2944          * segment being closed.
2945          */
2946         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
2947         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
2948
2949         up_write(&sit_i->sentry_lock);
2950
2951         if (page && IS_NODESEG(type)) {
2952                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
2953
2954                 f2fs_inode_chksum_set(sbi, page);
2955         }
2956
2957         if (add_list) {
2958                 struct f2fs_bio_info *io;
2959
2960                 INIT_LIST_HEAD(&fio->list);
2961                 fio->in_list = true;
2962                 fio->retry = false;
2963                 io = sbi->write_io[fio->type] + fio->temp;
2964                 spin_lock(&io->io_lock);
2965                 list_add_tail(&fio->list, &io->io_list);
2966                 spin_unlock(&io->io_lock);
2967         }
2968
2969         mutex_unlock(&curseg->curseg_mutex);
2970
2971         up_read(&SM_I(sbi)->curseg_lock);
2972 }
2973
2974 static void update_device_state(struct f2fs_io_info *fio)
2975 {
2976         struct f2fs_sb_info *sbi = fio->sbi;
2977         unsigned int devidx;
2978
2979         if (!f2fs_is_multi_device(sbi))
2980                 return;
2981
2982         devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
2983
2984         /* update device state for fsync */
2985         f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
2986
2987         /* update device state for checkpoint */
2988         if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
2989                 spin_lock(&sbi->dev_lock);
2990                 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
2991                 spin_unlock(&sbi->dev_lock);
2992         }
2993 }
2994
2995 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
2996 {
2997         int type = __get_segment_type(fio);
2998         bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
2999
3000         if (keep_order)
3001                 down_read(&fio->sbi->io_order_lock);
3002 reallocate:
3003         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3004                         &fio->new_blkaddr, sum, type, fio, true);
3005         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3006                 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3007                                         fio->old_blkaddr, fio->old_blkaddr);
3008
3009         /* writeout dirty page into bdev */
3010         f2fs_submit_page_write(fio);
3011         if (fio->retry) {
3012                 fio->old_blkaddr = fio->new_blkaddr;
3013                 goto reallocate;
3014         }
3015
3016         update_device_state(fio);
3017
3018         if (keep_order)
3019                 up_read(&fio->sbi->io_order_lock);
3020 }
3021
3022 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3023                                         enum iostat_type io_type)
3024 {
3025         struct f2fs_io_info fio = {
3026                 .sbi = sbi,
3027                 .type = META,
3028                 .temp = HOT,
3029                 .op = REQ_OP_WRITE,
3030                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3031                 .old_blkaddr = page->index,
3032                 .new_blkaddr = page->index,
3033                 .page = page,
3034                 .encrypted_page = NULL,
3035                 .in_list = false,
3036         };
3037
3038         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3039                 fio.op_flags &= ~REQ_META;
3040
3041         set_page_writeback(page);
3042         ClearPageError(page);
3043         f2fs_submit_page_write(&fio);
3044
3045         f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3046 }
3047
3048 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3049 {
3050         struct f2fs_summary sum;
3051
3052         set_summary(&sum, nid, 0, 0);
3053         do_write_page(&sum, fio);
3054
3055         f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3056 }
3057
3058 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3059                                         struct f2fs_io_info *fio)
3060 {
3061         struct f2fs_sb_info *sbi = fio->sbi;
3062         struct f2fs_summary sum;
3063
3064         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3065         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3066         do_write_page(&sum, fio);
3067         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3068
3069         f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3070 }
3071
3072 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3073 {
3074         int err;
3075         struct f2fs_sb_info *sbi = fio->sbi;
3076         unsigned int segno;
3077
3078         fio->new_blkaddr = fio->old_blkaddr;
3079         /* i/o temperature is needed for passing down write hints */
3080         __get_segment_type(fio);
3081
3082         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3083
3084         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3085                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3086                 return -EFSCORRUPTED;
3087         }
3088
3089         stat_inc_inplace_blocks(fio->sbi);
3090
3091         err = f2fs_submit_page_bio(fio);
3092         if (!err)
3093                 update_device_state(fio);
3094
3095         f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3096
3097         return err;
3098 }
3099
3100 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3101                                                 unsigned int segno)
3102 {
3103         int i;
3104
3105         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3106                 if (CURSEG_I(sbi, i)->segno == segno)
3107                         break;
3108         }
3109         return i;
3110 }
3111
3112 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3113                                 block_t old_blkaddr, block_t new_blkaddr,
3114                                 bool recover_curseg, bool recover_newaddr)
3115 {
3116         struct sit_info *sit_i = SIT_I(sbi);
3117         struct curseg_info *curseg;
3118         unsigned int segno, old_cursegno;
3119         struct seg_entry *se;
3120         int type;
3121         unsigned short old_blkoff;
3122
3123         segno = GET_SEGNO(sbi, new_blkaddr);
3124         se = get_seg_entry(sbi, segno);
3125         type = se->type;
3126
3127         down_write(&SM_I(sbi)->curseg_lock);
3128
3129         if (!recover_curseg) {
3130                 /* for recovery flow */
3131                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3132                         if (old_blkaddr == NULL_ADDR)
3133                                 type = CURSEG_COLD_DATA;
3134                         else
3135                                 type = CURSEG_WARM_DATA;
3136                 }
3137         } else {
3138                 if (IS_CURSEG(sbi, segno)) {
3139                         /* se->type is volatile as SSR allocation */
3140                         type = __f2fs_get_curseg(sbi, segno);
3141                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3142                 } else {
3143                         type = CURSEG_WARM_DATA;
3144                 }
3145         }
3146
3147         f2fs_bug_on(sbi, !IS_DATASEG(type));
3148         curseg = CURSEG_I(sbi, type);
3149
3150         mutex_lock(&curseg->curseg_mutex);
3151         down_write(&sit_i->sentry_lock);
3152
3153         old_cursegno = curseg->segno;
3154         old_blkoff = curseg->next_blkoff;
3155
3156         /* change the current segment */
3157         if (segno != curseg->segno) {
3158                 curseg->next_segno = segno;
3159                 change_curseg(sbi, type);
3160         }
3161
3162         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3163         __add_sum_entry(sbi, type, sum);
3164
3165         if (!recover_curseg || recover_newaddr)
3166                 update_sit_entry(sbi, new_blkaddr, 1);
3167         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3168                 invalidate_mapping_pages(META_MAPPING(sbi),
3169                                         old_blkaddr, old_blkaddr);
3170                 update_sit_entry(sbi, old_blkaddr, -1);
3171         }
3172
3173         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3174         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3175
3176         locate_dirty_segment(sbi, old_cursegno);
3177
3178         if (recover_curseg) {
3179                 if (old_cursegno != curseg->segno) {
3180                         curseg->next_segno = old_cursegno;
3181                         change_curseg(sbi, type);
3182                 }
3183                 curseg->next_blkoff = old_blkoff;
3184         }
3185
3186         up_write(&sit_i->sentry_lock);
3187         mutex_unlock(&curseg->curseg_mutex);
3188         up_write(&SM_I(sbi)->curseg_lock);
3189 }
3190
3191 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3192                                 block_t old_addr, block_t new_addr,
3193                                 unsigned char version, bool recover_curseg,
3194                                 bool recover_newaddr)
3195 {
3196         struct f2fs_summary sum;
3197
3198         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3199
3200         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3201                                         recover_curseg, recover_newaddr);
3202
3203         f2fs_update_data_blkaddr(dn, new_addr);
3204 }
3205
3206 void f2fs_wait_on_page_writeback(struct page *page,
3207                                 enum page_type type, bool ordered)
3208 {
3209         if (PageWriteback(page)) {
3210                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3211
3212                 f2fs_submit_merged_write_cond(sbi, page->mapping->host,
3213                                                 0, page->index, type);
3214                 if (ordered)
3215                         wait_on_page_writeback(page);
3216                 else
3217                         wait_for_stable_page(page);
3218         }
3219 }
3220
3221 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3222 {
3223         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3224         struct page *cpage;
3225
3226         if (!f2fs_post_read_required(inode))
3227                 return;
3228
3229         if (!is_valid_data_blkaddr(sbi, blkaddr))
3230                 return;
3231
3232         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3233         if (cpage) {
3234                 f2fs_wait_on_page_writeback(cpage, DATA, true);
3235                 f2fs_put_page(cpage, 1);
3236         }
3237 }
3238
3239 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3240 {
3241         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3242         struct curseg_info *seg_i;
3243         unsigned char *kaddr;
3244         struct page *page;
3245         block_t start;
3246         int i, j, offset;
3247
3248         start = start_sum_block(sbi);
3249
3250         page = f2fs_get_meta_page(sbi, start++);
3251         if (IS_ERR(page))
3252                 return PTR_ERR(page);
3253         kaddr = (unsigned char *)page_address(page);
3254
3255         /* Step 1: restore nat cache */
3256         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3257         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3258
3259         /* Step 2: restore sit cache */
3260         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3261         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3262         offset = 2 * SUM_JOURNAL_SIZE;
3263
3264         /* Step 3: restore summary entries */
3265         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3266                 unsigned short blk_off;
3267                 unsigned int segno;
3268
3269                 seg_i = CURSEG_I(sbi, i);
3270                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3271                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3272                 seg_i->next_segno = segno;
3273                 reset_curseg(sbi, i, 0);
3274                 seg_i->alloc_type = ckpt->alloc_type[i];
3275                 seg_i->next_blkoff = blk_off;
3276
3277                 if (seg_i->alloc_type == SSR)
3278                         blk_off = sbi->blocks_per_seg;
3279
3280                 for (j = 0; j < blk_off; j++) {
3281                         struct f2fs_summary *s;
3282                         s = (struct f2fs_summary *)(kaddr + offset);
3283                         seg_i->sum_blk->entries[j] = *s;
3284                         offset += SUMMARY_SIZE;
3285                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3286                                                 SUM_FOOTER_SIZE)
3287                                 continue;
3288
3289                         f2fs_put_page(page, 1);
3290                         page = NULL;
3291
3292                         page = f2fs_get_meta_page(sbi, start++);
3293                         if (IS_ERR(page))
3294                                 return PTR_ERR(page);
3295                         kaddr = (unsigned char *)page_address(page);
3296                         offset = 0;
3297                 }
3298         }
3299         f2fs_put_page(page, 1);
3300         return 0;
3301 }
3302
3303 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3304 {
3305         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3306         struct f2fs_summary_block *sum;
3307         struct curseg_info *curseg;
3308         struct page *new;
3309         unsigned short blk_off;
3310         unsigned int segno = 0;
3311         block_t blk_addr = 0;
3312         int err = 0;
3313
3314         /* get segment number and block addr */
3315         if (IS_DATASEG(type)) {
3316                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3317                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3318                                                         CURSEG_HOT_DATA]);
3319                 if (__exist_node_summaries(sbi))
3320                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3321                 else
3322                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3323         } else {
3324                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3325                                                         CURSEG_HOT_NODE]);
3326                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3327                                                         CURSEG_HOT_NODE]);
3328                 if (__exist_node_summaries(sbi))
3329                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3330                                                         type - CURSEG_HOT_NODE);
3331                 else
3332                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3333         }
3334
3335         new = f2fs_get_meta_page(sbi, blk_addr);
3336         if (IS_ERR(new))
3337                 return PTR_ERR(new);
3338         sum = (struct f2fs_summary_block *)page_address(new);
3339
3340         if (IS_NODESEG(type)) {
3341                 if (__exist_node_summaries(sbi)) {
3342                         struct f2fs_summary *ns = &sum->entries[0];
3343                         int i;
3344                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3345                                 ns->version = 0;
3346                                 ns->ofs_in_node = 0;
3347                         }
3348                 } else {
3349                         err = f2fs_restore_node_summary(sbi, segno, sum);
3350                         if (err)
3351                                 goto out;
3352                 }
3353         }
3354
3355         /* set uncompleted segment to curseg */
3356         curseg = CURSEG_I(sbi, type);
3357         mutex_lock(&curseg->curseg_mutex);
3358
3359         /* update journal info */
3360         down_write(&curseg->journal_rwsem);
3361         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3362         up_write(&curseg->journal_rwsem);
3363
3364         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3365         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3366         curseg->next_segno = segno;
3367         reset_curseg(sbi, type, 0);
3368         curseg->alloc_type = ckpt->alloc_type[type];
3369         curseg->next_blkoff = blk_off;
3370         mutex_unlock(&curseg->curseg_mutex);
3371 out:
3372         f2fs_put_page(new, 1);
3373         return err;
3374 }
3375
3376 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3377 {
3378         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3379         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3380         int type = CURSEG_HOT_DATA;
3381         int err;
3382
3383         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3384                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3385
3386                 if (npages >= 2)
3387                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3388                                                         META_CP, true);
3389
3390                 /* restore for compacted data summary */
3391                 err = read_compacted_summaries(sbi);
3392                 if (err)
3393                         return err;
3394                 type = CURSEG_HOT_NODE;
3395         }
3396
3397         if (__exist_node_summaries(sbi))
3398                 f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3399                                         NR_CURSEG_TYPE - type, META_CP, true);
3400
3401         for (; type <= CURSEG_COLD_NODE; type++) {
3402                 err = read_normal_summaries(sbi, type);
3403                 if (err)
3404                         return err;
3405         }
3406
3407         /* sanity check for summary blocks */
3408         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3409                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
3410                 return -EINVAL;
3411
3412         return 0;
3413 }
3414
3415 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3416 {
3417         struct page *page;
3418         unsigned char *kaddr;
3419         struct f2fs_summary *summary;
3420         struct curseg_info *seg_i;
3421         int written_size = 0;
3422         int i, j;
3423
3424         page = f2fs_grab_meta_page(sbi, blkaddr++);
3425         kaddr = (unsigned char *)page_address(page);
3426         memset(kaddr, 0, PAGE_SIZE);
3427
3428         /* Step 1: write nat cache */
3429         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3430         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3431         written_size += SUM_JOURNAL_SIZE;
3432
3433         /* Step 2: write sit cache */
3434         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3435         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3436         written_size += SUM_JOURNAL_SIZE;
3437
3438         /* Step 3: write summary entries */
3439         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3440                 unsigned short blkoff;
3441                 seg_i = CURSEG_I(sbi, i);
3442                 if (sbi->ckpt->alloc_type[i] == SSR)
3443                         blkoff = sbi->blocks_per_seg;
3444                 else
3445                         blkoff = curseg_blkoff(sbi, i);
3446
3447                 for (j = 0; j < blkoff; j++) {
3448                         if (!page) {
3449                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
3450                                 kaddr = (unsigned char *)page_address(page);
3451                                 memset(kaddr, 0, PAGE_SIZE);
3452                                 written_size = 0;
3453                         }
3454                         summary = (struct f2fs_summary *)(kaddr + written_size);
3455                         *summary = seg_i->sum_blk->entries[j];
3456                         written_size += SUMMARY_SIZE;
3457
3458                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3459                                                         SUM_FOOTER_SIZE)
3460                                 continue;
3461
3462                         set_page_dirty(page);
3463                         f2fs_put_page(page, 1);
3464                         page = NULL;
3465                 }
3466         }
3467         if (page) {
3468                 set_page_dirty(page);
3469                 f2fs_put_page(page, 1);
3470         }
3471 }
3472
3473 static void write_normal_summaries(struct f2fs_sb_info *sbi,
3474                                         block_t blkaddr, int type)
3475 {
3476         int i, end;
3477         if (IS_DATASEG(type))
3478                 end = type + NR_CURSEG_DATA_TYPE;
3479         else
3480                 end = type + NR_CURSEG_NODE_TYPE;
3481
3482         for (i = type; i < end; i++)
3483                 write_current_sum_page(sbi, i, blkaddr + (i - type));
3484 }
3485
3486 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3487 {
3488         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3489                 write_compacted_summaries(sbi, start_blk);
3490         else
3491                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3492 }
3493
3494 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3495 {
3496         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3497 }
3498
3499 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3500                                         unsigned int val, int alloc)
3501 {
3502         int i;
3503
3504         if (type == NAT_JOURNAL) {
3505                 for (i = 0; i < nats_in_cursum(journal); i++) {
3506                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3507                                 return i;
3508                 }
3509                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3510                         return update_nats_in_cursum(journal, 1);
3511         } else if (type == SIT_JOURNAL) {
3512                 for (i = 0; i < sits_in_cursum(journal); i++)
3513                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3514                                 return i;
3515                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3516                         return update_sits_in_cursum(journal, 1);
3517         }
3518         return -1;
3519 }
3520
3521 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3522                                         unsigned int segno)
3523 {
3524         return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
3525 }
3526
3527 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3528                                         unsigned int start)
3529 {
3530         struct sit_info *sit_i = SIT_I(sbi);
3531         struct page *page;
3532         pgoff_t src_off, dst_off;
3533
3534         src_off = current_sit_addr(sbi, start);
3535         dst_off = next_sit_addr(sbi, src_off);
3536
3537         page = f2fs_grab_meta_page(sbi, dst_off);
3538         seg_info_to_sit_page(sbi, page, start);
3539
3540         set_page_dirty(page);
3541         set_to_next_sit(sit_i, start);
3542
3543         return page;
3544 }
3545
3546 static struct sit_entry_set *grab_sit_entry_set(void)
3547 {
3548         struct sit_entry_set *ses =
3549                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3550
3551         ses->entry_cnt = 0;
3552         INIT_LIST_HEAD(&ses->set_list);
3553         return ses;
3554 }
3555
3556 static void release_sit_entry_set(struct sit_entry_set *ses)
3557 {
3558         list_del(&ses->set_list);
3559         kmem_cache_free(sit_entry_set_slab, ses);
3560 }
3561
3562 static void adjust_sit_entry_set(struct sit_entry_set *ses,
3563                                                 struct list_head *head)
3564 {
3565         struct sit_entry_set *next = ses;
3566
3567         if (list_is_last(&ses->set_list, head))
3568                 return;
3569
3570         list_for_each_entry_continue(next, head, set_list)
3571                 if (ses->entry_cnt <= next->entry_cnt)
3572                         break;
3573
3574         list_move_tail(&ses->set_list, &next->set_list);
3575 }
3576
3577 static void add_sit_entry(unsigned int segno, struct list_head *head)
3578 {
3579         struct sit_entry_set *ses;
3580         unsigned int start_segno = START_SEGNO(segno);
3581
3582         list_for_each_entry(ses, head, set_list) {
3583                 if (ses->start_segno == start_segno) {
3584                         ses->entry_cnt++;
3585                         adjust_sit_entry_set(ses, head);
3586                         return;
3587                 }
3588         }
3589
3590         ses = grab_sit_entry_set();
3591
3592         ses->start_segno = start_segno;
3593         ses->entry_cnt++;
3594         list_add(&ses->set_list, head);
3595 }
3596
3597 static void add_sits_in_set(struct f2fs_sb_info *sbi)
3598 {
3599         struct f2fs_sm_info *sm_info = SM_I(sbi);
3600         struct list_head *set_list = &sm_info->sit_entry_set;
3601         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3602         unsigned int segno;
3603
3604         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3605                 add_sit_entry(segno, set_list);
3606 }
3607
3608 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3609 {
3610         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3611         struct f2fs_journal *journal = curseg->journal;
3612         int i;
3613
3614         down_write(&curseg->journal_rwsem);
3615         for (i = 0; i < sits_in_cursum(journal); i++) {
3616                 unsigned int segno;
3617                 bool dirtied;
3618
3619                 segno = le32_to_cpu(segno_in_journal(journal, i));
3620                 dirtied = __mark_sit_entry_dirty(sbi, segno);
3621
3622                 if (!dirtied)
3623                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3624         }
3625         update_sits_in_cursum(journal, -i);
3626         up_write(&curseg->journal_rwsem);
3627 }
3628
3629 /*
3630  * CP calls this function, which flushes SIT entries including sit_journal,
3631  * and moves prefree segs to free segs.
3632  */
3633 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3634 {
3635         struct sit_info *sit_i = SIT_I(sbi);
3636         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3637         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3638         struct f2fs_journal *journal = curseg->journal;
3639         struct sit_entry_set *ses, *tmp;
3640         struct list_head *head = &SM_I(sbi)->sit_entry_set;
3641         bool to_journal = true;
3642         struct seg_entry *se;
3643
3644         down_write(&sit_i->sentry_lock);
3645
3646         if (!sit_i->dirty_sentries)
3647                 goto out;
3648
3649         /*
3650          * add and account sit entries of dirty bitmap in sit entry
3651          * set temporarily
3652          */
3653         add_sits_in_set(sbi);
3654
3655         /*
3656          * if there are no enough space in journal to store dirty sit
3657          * entries, remove all entries from journal and add and account
3658          * them in sit entry set.
3659          */
3660         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3661                 remove_sits_in_journal(sbi);
3662
3663         /*
3664          * there are two steps to flush sit entries:
3665          * #1, flush sit entries to journal in current cold data summary block.
3666          * #2, flush sit entries to sit page.
3667          */
3668         list_for_each_entry_safe(ses, tmp, head, set_list) {
3669                 struct page *page = NULL;
3670                 struct f2fs_sit_block *raw_sit = NULL;
3671                 unsigned int start_segno = ses->start_segno;
3672                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3673                                                 (unsigned long)MAIN_SEGS(sbi));
3674                 unsigned int segno = start_segno;
3675
3676                 if (to_journal &&
3677                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3678                         to_journal = false;
3679
3680                 if (to_journal) {
3681                         down_write(&curseg->journal_rwsem);
3682                 } else {
3683                         page = get_next_sit_page(sbi, start_segno);
3684                         raw_sit = page_address(page);
3685                 }
3686
3687                 /* flush dirty sit entries in region of current sit set */
3688                 for_each_set_bit_from(segno, bitmap, end) {
3689                         int offset, sit_offset;
3690
3691                         se = get_seg_entry(sbi, segno);
3692 #ifdef CONFIG_F2FS_CHECK_FS
3693                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3694                                                 SIT_VBLOCK_MAP_SIZE))
3695                                 f2fs_bug_on(sbi, 1);
3696 #endif
3697
3698                         /* add discard candidates */
3699                         if (!(cpc->reason & CP_DISCARD)) {
3700                                 cpc->trim_start = segno;
3701                                 add_discard_addrs(sbi, cpc, false);
3702                         }
3703
3704                         if (to_journal) {
3705                                 offset = f2fs_lookup_journal_in_cursum(journal,
3706                                                         SIT_JOURNAL, segno, 1);
3707                                 f2fs_bug_on(sbi, offset < 0);
3708                                 segno_in_journal(journal, offset) =
3709                                                         cpu_to_le32(segno);
3710                                 seg_info_to_raw_sit(se,
3711                                         &sit_in_journal(journal, offset));
3712                                 check_block_count(sbi, segno,
3713                                         &sit_in_journal(journal, offset));
3714                         } else {
3715                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3716                                 seg_info_to_raw_sit(se,
3717                                                 &raw_sit->entries[sit_offset]);
3718                                 check_block_count(sbi, segno,
3719                                                 &raw_sit->entries[sit_offset]);
3720                         }
3721
3722                         __clear_bit(segno, bitmap);
3723                         sit_i->dirty_sentries--;
3724                         ses->entry_cnt--;
3725                 }
3726
3727                 if (to_journal)
3728                         up_write(&curseg->journal_rwsem);
3729                 else
3730                         f2fs_put_page(page, 1);
3731
3732                 f2fs_bug_on(sbi, ses->entry_cnt);
3733                 release_sit_entry_set(ses);
3734         }
3735
3736         f2fs_bug_on(sbi, !list_empty(head));
3737         f2fs_bug_on(sbi, sit_i->dirty_sentries);
3738 out:
3739         if (cpc->reason & CP_DISCARD) {
3740                 __u64 trim_start = cpc->trim_start;
3741
3742                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3743                         add_discard_addrs(sbi, cpc, false);
3744
3745                 cpc->trim_start = trim_start;
3746         }
3747         up_write(&sit_i->sentry_lock);
3748
3749         set_prefree_as_free_segments(sbi);
3750 }
3751
3752 static int build_sit_info(struct f2fs_sb_info *sbi)
3753 {
3754         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
3755         struct sit_info *sit_i;
3756         unsigned int sit_segs, start;
3757         char *src_bitmap;
3758         unsigned int bitmap_size;
3759
3760         /* allocate memory for SIT information */
3761         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
3762         if (!sit_i)
3763                 return -ENOMEM;
3764
3765         SM_I(sbi)->sit_info = sit_i;
3766
3767         sit_i->sentries =
3768                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
3769                                               MAIN_SEGS(sbi)),
3770                               GFP_KERNEL);
3771         if (!sit_i->sentries)
3772                 return -ENOMEM;
3773
3774         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3775         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
3776                                                                 GFP_KERNEL);
3777         if (!sit_i->dirty_sentries_bitmap)
3778                 return -ENOMEM;
3779
3780         for (start = 0; start < MAIN_SEGS(sbi); start++) {
3781                 sit_i->sentries[start].cur_valid_map
3782                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3783                 sit_i->sentries[start].ckpt_valid_map
3784                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3785                 if (!sit_i->sentries[start].cur_valid_map ||
3786                                 !sit_i->sentries[start].ckpt_valid_map)
3787                         return -ENOMEM;
3788
3789 #ifdef CONFIG_F2FS_CHECK_FS
3790                 sit_i->sentries[start].cur_valid_map_mir
3791                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3792                 if (!sit_i->sentries[start].cur_valid_map_mir)
3793                         return -ENOMEM;
3794 #endif
3795
3796                 sit_i->sentries[start].discard_map
3797                         = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
3798                                                         GFP_KERNEL);
3799                 if (!sit_i->sentries[start].discard_map)
3800                         return -ENOMEM;
3801         }
3802
3803         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3804         if (!sit_i->tmp_map)
3805                 return -ENOMEM;
3806
3807         if (sbi->segs_per_sec > 1) {
3808                 sit_i->sec_entries =
3809                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
3810                                                       MAIN_SECS(sbi)),
3811                                       GFP_KERNEL);
3812                 if (!sit_i->sec_entries)
3813                         return -ENOMEM;
3814         }
3815
3816         /* get information related with SIT */
3817         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
3818
3819         /* setup SIT bitmap from ckeckpoint pack */
3820         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
3821         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
3822
3823         sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3824         if (!sit_i->sit_bitmap)
3825                 return -ENOMEM;
3826
3827 #ifdef CONFIG_F2FS_CHECK_FS
3828         sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
3829         if (!sit_i->sit_bitmap_mir)
3830                 return -ENOMEM;
3831 #endif
3832
3833         /* init SIT information */
3834         sit_i->s_ops = &default_salloc_ops;
3835
3836         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
3837         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3838         sit_i->written_valid_blocks = 0;
3839         sit_i->bitmap_size = bitmap_size;
3840         sit_i->dirty_sentries = 0;
3841         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
3842         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3843         sit_i->mounted_time = ktime_get_real_seconds();
3844         init_rwsem(&sit_i->sentry_lock);
3845         return 0;
3846 }
3847
3848 static int build_free_segmap(struct f2fs_sb_info *sbi)
3849 {
3850         struct free_segmap_info *free_i;
3851         unsigned int bitmap_size, sec_bitmap_size;
3852
3853         /* allocate memory for free segmap information */
3854         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
3855         if (!free_i)
3856                 return -ENOMEM;
3857
3858         SM_I(sbi)->free_info = free_i;
3859
3860         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
3861         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
3862         if (!free_i->free_segmap)
3863                 return -ENOMEM;
3864
3865         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
3866         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
3867         if (!free_i->free_secmap)
3868                 return -ENOMEM;
3869
3870         /* set all segments as dirty temporarily */
3871         memset(free_i->free_segmap, 0xff, bitmap_size);
3872         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
3873
3874         /* init free segmap information */
3875         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
3876         free_i->free_segments = 0;
3877         free_i->free_sections = 0;
3878         spin_lock_init(&free_i->segmap_lock);
3879         return 0;
3880 }
3881
3882 static int build_curseg(struct f2fs_sb_info *sbi)
3883 {
3884         struct curseg_info *array;
3885         int i;
3886
3887         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
3888                              GFP_KERNEL);
3889         if (!array)
3890                 return -ENOMEM;
3891
3892         SM_I(sbi)->curseg_array = array;
3893
3894         for (i = 0; i < NR_CURSEG_TYPE; i++) {
3895                 mutex_init(&array[i].curseg_mutex);
3896                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
3897                 if (!array[i].sum_blk)
3898                         return -ENOMEM;
3899                 init_rwsem(&array[i].journal_rwsem);
3900                 array[i].journal = f2fs_kzalloc(sbi,
3901                                 sizeof(struct f2fs_journal), GFP_KERNEL);
3902                 if (!array[i].journal)
3903                         return -ENOMEM;
3904                 array[i].segno = NULL_SEGNO;
3905                 array[i].next_blkoff = 0;
3906         }
3907         return restore_curseg_summaries(sbi);
3908 }
3909
3910 static int build_sit_entries(struct f2fs_sb_info *sbi)
3911 {
3912         struct sit_info *sit_i = SIT_I(sbi);
3913         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3914         struct f2fs_journal *journal = curseg->journal;
3915         struct seg_entry *se;
3916         struct f2fs_sit_entry sit;
3917         int sit_blk_cnt = SIT_BLK_CNT(sbi);
3918         unsigned int i, start, end;
3919         unsigned int readed, start_blk = 0;
3920         int err = 0;
3921         block_t total_node_blocks = 0;
3922
3923         do {
3924                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
3925                                                         META_SIT, true);
3926
3927                 start = start_blk * sit_i->sents_per_block;
3928                 end = (start_blk + readed) * sit_i->sents_per_block;
3929
3930                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
3931                         struct f2fs_sit_block *sit_blk;
3932                         struct page *page;
3933
3934                         se = &sit_i->sentries[start];
3935                         page = get_current_sit_page(sbi, start);
3936                         sit_blk = (struct f2fs_sit_block *)page_address(page);
3937                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
3938                         f2fs_put_page(page, 1);
3939
3940                         err = check_block_count(sbi, start, &sit);
3941                         if (err)
3942                                 return err;
3943                         seg_info_from_raw_sit(se, &sit);
3944                         if (IS_NODESEG(se->type))
3945                                 total_node_blocks += se->valid_blocks;
3946
3947                         /* build discard map only one time */
3948                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
3949                                 memset(se->discard_map, 0xff,
3950                                         SIT_VBLOCK_MAP_SIZE);
3951                         } else {
3952                                 memcpy(se->discard_map,
3953                                         se->cur_valid_map,
3954                                         SIT_VBLOCK_MAP_SIZE);
3955                                 sbi->discard_blks +=
3956                                         sbi->blocks_per_seg -
3957                                         se->valid_blocks;
3958                         }
3959
3960                         if (sbi->segs_per_sec > 1)
3961                                 get_sec_entry(sbi, start)->valid_blocks +=
3962                                                         se->valid_blocks;
3963                 }
3964                 start_blk += readed;
3965         } while (start_blk < sit_blk_cnt);
3966
3967         down_read(&curseg->journal_rwsem);
3968         for (i = 0; i < sits_in_cursum(journal); i++) {
3969                 unsigned int old_valid_blocks;
3970
3971                 start = le32_to_cpu(segno_in_journal(journal, i));
3972                 if (start >= MAIN_SEGS(sbi)) {
3973                         f2fs_msg(sbi->sb, KERN_ERR,
3974                                         "Wrong journal entry on segno %u",
3975                                         start);
3976                         set_sbi_flag(sbi, SBI_NEED_FSCK);
3977                         err = -EFSCORRUPTED;
3978                         break;
3979                 }
3980
3981                 se = &sit_i->sentries[start];
3982                 sit = sit_in_journal(journal, i);
3983
3984                 old_valid_blocks = se->valid_blocks;
3985                 if (IS_NODESEG(se->type))
3986                         total_node_blocks -= old_valid_blocks;
3987
3988                 err = check_block_count(sbi, start, &sit);
3989                 if (err)
3990                         break;
3991                 seg_info_from_raw_sit(se, &sit);
3992                 if (IS_NODESEG(se->type))
3993                         total_node_blocks += se->valid_blocks;
3994
3995                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
3996                         memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
3997                 } else {
3998                         memcpy(se->discard_map, se->cur_valid_map,
3999                                                 SIT_VBLOCK_MAP_SIZE);
4000                         sbi->discard_blks += old_valid_blocks;
4001                         sbi->discard_blks -= se->valid_blocks;
4002                 }
4003
4004                 if (sbi->segs_per_sec > 1) {
4005                         get_sec_entry(sbi, start)->valid_blocks +=
4006                                                         se->valid_blocks;
4007                         get_sec_entry(sbi, start)->valid_blocks -=
4008                                                         old_valid_blocks;
4009                 }
4010         }
4011         up_read(&curseg->journal_rwsem);
4012
4013         if (!err && total_node_blocks != valid_node_count(sbi)) {
4014                 f2fs_msg(sbi->sb, KERN_ERR,
4015                         "SIT is corrupted node# %u vs %u",
4016                         total_node_blocks, valid_node_count(sbi));
4017                 set_sbi_flag(sbi, SBI_NEED_FSCK);
4018                 err = -EFSCORRUPTED;
4019         }
4020
4021         return err;
4022 }
4023
4024 static void init_free_segmap(struct f2fs_sb_info *sbi)
4025 {
4026         unsigned int start;
4027         int type;
4028
4029         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4030                 struct seg_entry *sentry = get_seg_entry(sbi, start);
4031                 if (!sentry->valid_blocks)
4032                         __set_free(sbi, start);
4033                 else
4034                         SIT_I(sbi)->written_valid_blocks +=
4035                                                 sentry->valid_blocks;
4036         }
4037
4038         /* set use the current segments */
4039         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4040                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4041                 __set_test_and_inuse(sbi, curseg_t->segno);
4042         }
4043 }
4044
4045 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4046 {
4047         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4048         struct free_segmap_info *free_i = FREE_I(sbi);
4049         unsigned int segno = 0, offset = 0;
4050         unsigned short valid_blocks;
4051
4052         while (1) {
4053                 /* find dirty segment based on free segmap */
4054                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4055                 if (segno >= MAIN_SEGS(sbi))
4056                         break;
4057                 offset = segno + 1;
4058                 valid_blocks = get_valid_blocks(sbi, segno, false);
4059                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4060                         continue;
4061                 if (valid_blocks > sbi->blocks_per_seg) {
4062                         f2fs_bug_on(sbi, 1);
4063                         continue;
4064                 }
4065                 mutex_lock(&dirty_i->seglist_lock);
4066                 __locate_dirty_segment(sbi, segno, DIRTY);
4067                 mutex_unlock(&dirty_i->seglist_lock);
4068         }
4069 }
4070
4071 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4072 {
4073         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4074         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4075
4076         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4077         if (!dirty_i->victim_secmap)
4078                 return -ENOMEM;
4079         return 0;
4080 }
4081
4082 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4083 {
4084         struct dirty_seglist_info *dirty_i;
4085         unsigned int bitmap_size, i;
4086
4087         /* allocate memory for dirty segments list information */
4088         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4089                                                                 GFP_KERNEL);
4090         if (!dirty_i)
4091                 return -ENOMEM;
4092
4093         SM_I(sbi)->dirty_info = dirty_i;
4094         mutex_init(&dirty_i->seglist_lock);
4095
4096         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4097
4098         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4099                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4100                                                                 GFP_KERNEL);
4101                 if (!dirty_i->dirty_segmap[i])
4102                         return -ENOMEM;
4103         }
4104
4105         init_dirty_segmap(sbi);
4106         return init_victim_secmap(sbi);
4107 }
4108
4109 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4110 {
4111         int i;
4112
4113         /*
4114          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4115          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4116          */
4117         for (i = 0; i < NO_CHECK_TYPE; i++) {
4118                 struct curseg_info *curseg = CURSEG_I(sbi, i);
4119                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4120                 unsigned int blkofs = curseg->next_blkoff;
4121
4122                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4123                         goto out;
4124
4125                 if (curseg->alloc_type == SSR)
4126                         continue;
4127
4128                 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4129                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4130                                 continue;
4131 out:
4132                         f2fs_msg(sbi->sb, KERN_ERR,
4133                                 "Current segment's next free block offset is "
4134                                 "inconsistent with bitmap, logtype:%u, "
4135                                 "segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4136                                 i, curseg->segno, curseg->alloc_type,
4137                                 curseg->next_blkoff, blkofs);
4138                         return -EFSCORRUPTED;
4139                 }
4140         }
4141         return 0;
4142 }
4143
4144 /*
4145  * Update min, max modified time for cost-benefit GC algorithm
4146  */
4147 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4148 {
4149         struct sit_info *sit_i = SIT_I(sbi);
4150         unsigned int segno;
4151
4152         down_write(&sit_i->sentry_lock);
4153
4154         sit_i->min_mtime = ULLONG_MAX;
4155
4156         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4157                 unsigned int i;
4158                 unsigned long long mtime = 0;
4159
4160                 for (i = 0; i < sbi->segs_per_sec; i++)
4161                         mtime += get_seg_entry(sbi, segno + i)->mtime;
4162
4163                 mtime = div_u64(mtime, sbi->segs_per_sec);
4164
4165                 if (sit_i->min_mtime > mtime)
4166                         sit_i->min_mtime = mtime;
4167         }
4168         sit_i->max_mtime = get_mtime(sbi, false);
4169         up_write(&sit_i->sentry_lock);
4170 }
4171
4172 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4173 {
4174         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4175         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4176         struct f2fs_sm_info *sm_info;
4177         int err;
4178
4179         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4180         if (!sm_info)
4181                 return -ENOMEM;
4182
4183         /* init sm info */
4184         sbi->sm_info = sm_info;
4185         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4186         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4187         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4188         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4189         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4190         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4191         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4192         sm_info->rec_prefree_segments = sm_info->main_segments *
4193                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4194         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4195                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4196
4197         if (!test_opt(sbi, LFS))
4198                 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4199         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4200         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4201         sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4202         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4203         sm_info->min_ssr_sections = reserved_sections(sbi);
4204
4205         INIT_LIST_HEAD(&sm_info->sit_entry_set);
4206
4207         init_rwsem(&sm_info->curseg_lock);
4208
4209         if (!f2fs_readonly(sbi->sb)) {
4210                 err = f2fs_create_flush_cmd_control(sbi);
4211                 if (err)
4212                         return err;
4213         }
4214
4215         err = create_discard_cmd_control(sbi);
4216         if (err)
4217                 return err;
4218
4219         err = build_sit_info(sbi);
4220         if (err)
4221                 return err;
4222         err = build_free_segmap(sbi);
4223         if (err)
4224                 return err;
4225         err = build_curseg(sbi);
4226         if (err)
4227                 return err;
4228
4229         /* reinit free segmap based on SIT */
4230         err = build_sit_entries(sbi);
4231         if (err)
4232                 return err;
4233
4234         init_free_segmap(sbi);
4235         err = build_dirty_segmap(sbi);
4236         if (err)
4237                 return err;
4238
4239         err = sanity_check_curseg(sbi);
4240         if (err)
4241                 return err;
4242
4243         init_min_max_mtime(sbi);
4244         return 0;
4245 }
4246
4247 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4248                 enum dirty_type dirty_type)
4249 {
4250         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4251
4252         mutex_lock(&dirty_i->seglist_lock);
4253         kvfree(dirty_i->dirty_segmap[dirty_type]);
4254         dirty_i->nr_dirty[dirty_type] = 0;
4255         mutex_unlock(&dirty_i->seglist_lock);
4256 }
4257
4258 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4259 {
4260         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4261         kvfree(dirty_i->victim_secmap);
4262 }
4263
4264 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4265 {
4266         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4267         int i;
4268
4269         if (!dirty_i)
4270                 return;
4271
4272         /* discard pre-free/dirty segments list */
4273         for (i = 0; i < NR_DIRTY_TYPE; i++)
4274                 discard_dirty_segmap(sbi, i);
4275
4276         destroy_victim_secmap(sbi);
4277         SM_I(sbi)->dirty_info = NULL;
4278         kfree(dirty_i);
4279 }
4280
4281 static void destroy_curseg(struct f2fs_sb_info *sbi)
4282 {
4283         struct curseg_info *array = SM_I(sbi)->curseg_array;
4284         int i;
4285
4286         if (!array)
4287                 return;
4288         SM_I(sbi)->curseg_array = NULL;
4289         for (i = 0; i < NR_CURSEG_TYPE; i++) {
4290                 kfree(array[i].sum_blk);
4291                 kfree(array[i].journal);
4292         }
4293         kfree(array);
4294 }
4295
4296 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4297 {
4298         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4299         if (!free_i)
4300                 return;
4301         SM_I(sbi)->free_info = NULL;
4302         kvfree(free_i->free_segmap);
4303         kvfree(free_i->free_secmap);
4304         kfree(free_i);
4305 }
4306
4307 static void destroy_sit_info(struct f2fs_sb_info *sbi)
4308 {
4309         struct sit_info *sit_i = SIT_I(sbi);
4310         unsigned int start;
4311
4312         if (!sit_i)
4313                 return;
4314
4315         if (sit_i->sentries) {
4316                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4317                         kfree(sit_i->sentries[start].cur_valid_map);
4318 #ifdef CONFIG_F2FS_CHECK_FS
4319                         kfree(sit_i->sentries[start].cur_valid_map_mir);
4320 #endif
4321                         kfree(sit_i->sentries[start].ckpt_valid_map);
4322                         kfree(sit_i->sentries[start].discard_map);
4323                 }
4324         }
4325         kfree(sit_i->tmp_map);
4326
4327         kvfree(sit_i->sentries);
4328         kvfree(sit_i->sec_entries);
4329         kvfree(sit_i->dirty_sentries_bitmap);
4330
4331         SM_I(sbi)->sit_info = NULL;
4332         kfree(sit_i->sit_bitmap);
4333 #ifdef CONFIG_F2FS_CHECK_FS
4334         kfree(sit_i->sit_bitmap_mir);
4335 #endif
4336         kfree(sit_i);
4337 }
4338
4339 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4340 {
4341         struct f2fs_sm_info *sm_info = SM_I(sbi);
4342
4343         if (!sm_info)
4344                 return;
4345         f2fs_destroy_flush_cmd_control(sbi, true);
4346         destroy_discard_cmd_control(sbi);
4347         destroy_dirty_segmap(sbi);
4348         destroy_curseg(sbi);
4349         destroy_free_segmap(sbi);
4350         destroy_sit_info(sbi);
4351         sbi->sm_info = NULL;
4352         kfree(sm_info);
4353 }
4354
4355 int __init f2fs_create_segment_manager_caches(void)
4356 {
4357         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4358                         sizeof(struct discard_entry));
4359         if (!discard_entry_slab)
4360                 goto fail;
4361
4362         discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
4363                         sizeof(struct discard_cmd));
4364         if (!discard_cmd_slab)
4365                 goto destroy_discard_entry;
4366
4367         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4368                         sizeof(struct sit_entry_set));
4369         if (!sit_entry_set_slab)
4370                 goto destroy_discard_cmd;
4371
4372         inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
4373                         sizeof(struct inmem_pages));
4374         if (!inmem_entry_slab)
4375                 goto destroy_sit_entry_set;
4376         return 0;
4377
4378 destroy_sit_entry_set:
4379         kmem_cache_destroy(sit_entry_set_slab);
4380 destroy_discard_cmd:
4381         kmem_cache_destroy(discard_cmd_slab);
4382 destroy_discard_entry:
4383         kmem_cache_destroy(discard_entry_slab);
4384 fail:
4385         return -ENOMEM;
4386 }
4387
4388 void f2fs_destroy_segment_manager_caches(void)
4389 {
4390         kmem_cache_destroy(sit_entry_set_slab);
4391         kmem_cache_destroy(discard_cmd_slab);
4392         kmem_cache_destroy(discard_entry_slab);
4393         kmem_cache_destroy(inmem_entry_slab);
4394 }