GNU Linux-libre 6.8.7-gnu
[releases.git] / fs / f2fs / segment.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/sched/mm.h>
13 #include <linux/prefetch.h>
14 #include <linux/kthread.h>
15 #include <linux/swap.h>
16 #include <linux/timer.h>
17 #include <linux/freezer.h>
18 #include <linux/sched/signal.h>
19 #include <linux/random.h>
20
21 #include "f2fs.h"
22 #include "segment.h"
23 #include "node.h"
24 #include "gc.h"
25 #include "iostat.h"
26 #include <trace/events/f2fs.h>
27
28 #define __reverse_ffz(x) __reverse_ffs(~(x))
29
30 static struct kmem_cache *discard_entry_slab;
31 static struct kmem_cache *discard_cmd_slab;
32 static struct kmem_cache *sit_entry_set_slab;
33 static struct kmem_cache *revoke_entry_slab;
34
35 static unsigned long __reverse_ulong(unsigned char *str)
36 {
37         unsigned long tmp = 0;
38         int shift = 24, idx = 0;
39
40 #if BITS_PER_LONG == 64
41         shift = 56;
42 #endif
43         while (shift >= 0) {
44                 tmp |= (unsigned long)str[idx++] << shift;
45                 shift -= BITS_PER_BYTE;
46         }
47         return tmp;
48 }
49
50 /*
51  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52  * MSB and LSB are reversed in a byte by f2fs_set_bit.
53  */
54 static inline unsigned long __reverse_ffs(unsigned long word)
55 {
56         int num = 0;
57
58 #if BITS_PER_LONG == 64
59         if ((word & 0xffffffff00000000UL) == 0)
60                 num += 32;
61         else
62                 word >>= 32;
63 #endif
64         if ((word & 0xffff0000) == 0)
65                 num += 16;
66         else
67                 word >>= 16;
68
69         if ((word & 0xff00) == 0)
70                 num += 8;
71         else
72                 word >>= 8;
73
74         if ((word & 0xf0) == 0)
75                 num += 4;
76         else
77                 word >>= 4;
78
79         if ((word & 0xc) == 0)
80                 num += 2;
81         else
82                 word >>= 2;
83
84         if ((word & 0x2) == 0)
85                 num += 1;
86         return num;
87 }
88
89 /*
90  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91  * f2fs_set_bit makes MSB and LSB reversed in a byte.
92  * @size must be integral times of unsigned long.
93  * Example:
94  *                             MSB <--> LSB
95  *   f2fs_set_bit(0, bitmap) => 1000 0000
96  *   f2fs_set_bit(7, bitmap) => 0000 0001
97  */
98 static unsigned long __find_rev_next_bit(const unsigned long *addr,
99                         unsigned long size, unsigned long offset)
100 {
101         const unsigned long *p = addr + BIT_WORD(offset);
102         unsigned long result = size;
103         unsigned long tmp;
104
105         if (offset >= size)
106                 return size;
107
108         size -= (offset & ~(BITS_PER_LONG - 1));
109         offset %= BITS_PER_LONG;
110
111         while (1) {
112                 if (*p == 0)
113                         goto pass;
114
115                 tmp = __reverse_ulong((unsigned char *)p);
116
117                 tmp &= ~0UL >> offset;
118                 if (size < BITS_PER_LONG)
119                         tmp &= (~0UL << (BITS_PER_LONG - size));
120                 if (tmp)
121                         goto found;
122 pass:
123                 if (size <= BITS_PER_LONG)
124                         break;
125                 size -= BITS_PER_LONG;
126                 offset = 0;
127                 p++;
128         }
129         return result;
130 found:
131         return result - size + __reverse_ffs(tmp);
132 }
133
134 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135                         unsigned long size, unsigned long offset)
136 {
137         const unsigned long *p = addr + BIT_WORD(offset);
138         unsigned long result = size;
139         unsigned long tmp;
140
141         if (offset >= size)
142                 return size;
143
144         size -= (offset & ~(BITS_PER_LONG - 1));
145         offset %= BITS_PER_LONG;
146
147         while (1) {
148                 if (*p == ~0UL)
149                         goto pass;
150
151                 tmp = __reverse_ulong((unsigned char *)p);
152
153                 if (offset)
154                         tmp |= ~0UL << (BITS_PER_LONG - offset);
155                 if (size < BITS_PER_LONG)
156                         tmp |= ~0UL >> size;
157                 if (tmp != ~0UL)
158                         goto found;
159 pass:
160                 if (size <= BITS_PER_LONG)
161                         break;
162                 size -= BITS_PER_LONG;
163                 offset = 0;
164                 p++;
165         }
166         return result;
167 found:
168         return result - size + __reverse_ffz(tmp);
169 }
170
171 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172 {
173         int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174         int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175         int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176
177         if (f2fs_lfs_mode(sbi))
178                 return false;
179         if (sbi->gc_mode == GC_URGENT_HIGH)
180                 return true;
181         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182                 return true;
183
184         return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185                         SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186 }
187
188 void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189 {
190         struct f2fs_inode_info *fi = F2FS_I(inode);
191
192         if (!f2fs_is_atomic_file(inode))
193                 return;
194
195         if (clean)
196                 truncate_inode_pages_final(inode->i_mapping);
197
198         release_atomic_write_cnt(inode);
199         clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200         clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201         clear_inode_flag(inode, FI_ATOMIC_FILE);
202         stat_dec_atomic_inode(inode);
203
204         F2FS_I(inode)->atomic_write_task = NULL;
205
206         if (clean) {
207                 f2fs_i_size_write(inode, fi->original_i_size);
208                 fi->original_i_size = 0;
209         }
210         /* avoid stale dirty inode during eviction */
211         sync_inode_metadata(inode, 0);
212 }
213
214 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
215                         block_t new_addr, block_t *old_addr, bool recover)
216 {
217         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
218         struct dnode_of_data dn;
219         struct node_info ni;
220         int err;
221
222 retry:
223         set_new_dnode(&dn, inode, NULL, NULL, 0);
224         err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
225         if (err) {
226                 if (err == -ENOMEM) {
227                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
228                         goto retry;
229                 }
230                 return err;
231         }
232
233         err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
234         if (err) {
235                 f2fs_put_dnode(&dn);
236                 return err;
237         }
238
239         if (recover) {
240                 /* dn.data_blkaddr is always valid */
241                 if (!__is_valid_data_blkaddr(new_addr)) {
242                         if (new_addr == NULL_ADDR)
243                                 dec_valid_block_count(sbi, inode, 1);
244                         f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
245                         f2fs_update_data_blkaddr(&dn, new_addr);
246                 } else {
247                         f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
248                                 new_addr, ni.version, true, true);
249                 }
250         } else {
251                 blkcnt_t count = 1;
252
253                 err = inc_valid_block_count(sbi, inode, &count, true);
254                 if (err) {
255                         f2fs_put_dnode(&dn);
256                         return err;
257                 }
258
259                 *old_addr = dn.data_blkaddr;
260                 f2fs_truncate_data_blocks_range(&dn, 1);
261                 dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
262
263                 f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
264                                         ni.version, true, false);
265         }
266
267         f2fs_put_dnode(&dn);
268
269         trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
270                         index, old_addr ? *old_addr : 0, new_addr, recover);
271         return 0;
272 }
273
274 static void __complete_revoke_list(struct inode *inode, struct list_head *head,
275                                         bool revoke)
276 {
277         struct revoke_entry *cur, *tmp;
278         pgoff_t start_index = 0;
279         bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
280
281         list_for_each_entry_safe(cur, tmp, head, list) {
282                 if (revoke) {
283                         __replace_atomic_write_block(inode, cur->index,
284                                                 cur->old_addr, NULL, true);
285                 } else if (truncate) {
286                         f2fs_truncate_hole(inode, start_index, cur->index);
287                         start_index = cur->index + 1;
288                 }
289
290                 list_del(&cur->list);
291                 kmem_cache_free(revoke_entry_slab, cur);
292         }
293
294         if (!revoke && truncate)
295                 f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
296 }
297
298 static int __f2fs_commit_atomic_write(struct inode *inode)
299 {
300         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
301         struct f2fs_inode_info *fi = F2FS_I(inode);
302         struct inode *cow_inode = fi->cow_inode;
303         struct revoke_entry *new;
304         struct list_head revoke_list;
305         block_t blkaddr;
306         struct dnode_of_data dn;
307         pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
308         pgoff_t off = 0, blen, index;
309         int ret = 0, i;
310
311         INIT_LIST_HEAD(&revoke_list);
312
313         while (len) {
314                 blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
315
316                 set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
317                 ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
318                 if (ret && ret != -ENOENT) {
319                         goto out;
320                 } else if (ret == -ENOENT) {
321                         ret = 0;
322                         if (dn.max_level == 0)
323                                 goto out;
324                         goto next;
325                 }
326
327                 blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
328                                 len);
329                 index = off;
330                 for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
331                         blkaddr = f2fs_data_blkaddr(&dn);
332
333                         if (!__is_valid_data_blkaddr(blkaddr)) {
334                                 continue;
335                         } else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
336                                         DATA_GENERIC_ENHANCE)) {
337                                 f2fs_put_dnode(&dn);
338                                 ret = -EFSCORRUPTED;
339                                 f2fs_handle_error(sbi,
340                                                 ERROR_INVALID_BLKADDR);
341                                 goto out;
342                         }
343
344                         new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
345                                                         true, NULL);
346
347                         ret = __replace_atomic_write_block(inode, index, blkaddr,
348                                                         &new->old_addr, false);
349                         if (ret) {
350                                 f2fs_put_dnode(&dn);
351                                 kmem_cache_free(revoke_entry_slab, new);
352                                 goto out;
353                         }
354
355                         f2fs_update_data_blkaddr(&dn, NULL_ADDR);
356                         new->index = index;
357                         list_add_tail(&new->list, &revoke_list);
358                 }
359                 f2fs_put_dnode(&dn);
360 next:
361                 off += blen;
362                 len -= blen;
363         }
364
365 out:
366         if (ret) {
367                 sbi->revoked_atomic_block += fi->atomic_write_cnt;
368         } else {
369                 sbi->committed_atomic_block += fi->atomic_write_cnt;
370                 set_inode_flag(inode, FI_ATOMIC_COMMITTED);
371         }
372
373         __complete_revoke_list(inode, &revoke_list, ret ? true : false);
374
375         return ret;
376 }
377
378 int f2fs_commit_atomic_write(struct inode *inode)
379 {
380         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
381         struct f2fs_inode_info *fi = F2FS_I(inode);
382         int err;
383
384         err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
385         if (err)
386                 return err;
387
388         f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
389         f2fs_lock_op(sbi);
390
391         err = __f2fs_commit_atomic_write(inode);
392
393         f2fs_unlock_op(sbi);
394         f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
395
396         return err;
397 }
398
399 /*
400  * This function balances dirty node and dentry pages.
401  * In addition, it controls garbage collection.
402  */
403 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
404 {
405         if (time_to_inject(sbi, FAULT_CHECKPOINT))
406                 f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
407
408         /* balance_fs_bg is able to be pending */
409         if (need && excess_cached_nats(sbi))
410                 f2fs_balance_fs_bg(sbi, false);
411
412         if (!f2fs_is_checkpoint_ready(sbi))
413                 return;
414
415         /*
416          * We should do GC or end up with checkpoint, if there are so many dirty
417          * dir/node pages without enough free segments.
418          */
419         if (has_enough_free_secs(sbi, 0, 0))
420                 return;
421
422         if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
423                                 sbi->gc_thread->f2fs_gc_task) {
424                 DEFINE_WAIT(wait);
425
426                 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
427                                         TASK_UNINTERRUPTIBLE);
428                 wake_up(&sbi->gc_thread->gc_wait_queue_head);
429                 io_schedule();
430                 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
431         } else {
432                 struct f2fs_gc_control gc_control = {
433                         .victim_segno = NULL_SEGNO,
434                         .init_gc_type = BG_GC,
435                         .no_bg_gc = true,
436                         .should_migrate_blocks = false,
437                         .err_gc_skipped = false,
438                         .nr_free_secs = 1 };
439                 f2fs_down_write(&sbi->gc_lock);
440                 stat_inc_gc_call_count(sbi, FOREGROUND);
441                 f2fs_gc(sbi, &gc_control);
442         }
443 }
444
445 static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
446 {
447         int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
448         unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
449         unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
450         unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
451         unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
452         unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
453         unsigned int threshold = sbi->blocks_per_seg * factor *
454                                         DEFAULT_DIRTY_THRESHOLD;
455         unsigned int global_threshold = threshold * 3 / 2;
456
457         if (dents >= threshold || qdata >= threshold ||
458                 nodes >= threshold || meta >= threshold ||
459                 imeta >= threshold)
460                 return true;
461         return dents + qdata + nodes + meta + imeta >  global_threshold;
462 }
463
464 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
465 {
466         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
467                 return;
468
469         /* try to shrink extent cache when there is no enough memory */
470         if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
471                 f2fs_shrink_read_extent_tree(sbi,
472                                 READ_EXTENT_CACHE_SHRINK_NUMBER);
473
474         /* try to shrink age extent cache when there is no enough memory */
475         if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
476                 f2fs_shrink_age_extent_tree(sbi,
477                                 AGE_EXTENT_CACHE_SHRINK_NUMBER);
478
479         /* check the # of cached NAT entries */
480         if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
481                 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
482
483         if (!f2fs_available_free_memory(sbi, FREE_NIDS))
484                 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
485         else
486                 f2fs_build_free_nids(sbi, false, false);
487
488         if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
489                 excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
490                 goto do_sync;
491
492         /* there is background inflight IO or foreground operation recently */
493         if (is_inflight_io(sbi, REQ_TIME) ||
494                 (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
495                 return;
496
497         /* exceed periodical checkpoint timeout threshold */
498         if (f2fs_time_over(sbi, CP_TIME))
499                 goto do_sync;
500
501         /* checkpoint is the only way to shrink partial cached entries */
502         if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
503                 f2fs_available_free_memory(sbi, INO_ENTRIES))
504                 return;
505
506 do_sync:
507         if (test_opt(sbi, DATA_FLUSH) && from_bg) {
508                 struct blk_plug plug;
509
510                 mutex_lock(&sbi->flush_lock);
511
512                 blk_start_plug(&plug);
513                 f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
514                 blk_finish_plug(&plug);
515
516                 mutex_unlock(&sbi->flush_lock);
517         }
518         stat_inc_cp_call_count(sbi, BACKGROUND);
519         f2fs_sync_fs(sbi->sb, 1);
520 }
521
522 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
523                                 struct block_device *bdev)
524 {
525         int ret = blkdev_issue_flush(bdev);
526
527         trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
528                                 test_opt(sbi, FLUSH_MERGE), ret);
529         if (!ret)
530                 f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
531         return ret;
532 }
533
534 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
535 {
536         int ret = 0;
537         int i;
538
539         if (!f2fs_is_multi_device(sbi))
540                 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
541
542         for (i = 0; i < sbi->s_ndevs; i++) {
543                 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
544                         continue;
545                 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
546                 if (ret)
547                         break;
548         }
549         return ret;
550 }
551
552 static int issue_flush_thread(void *data)
553 {
554         struct f2fs_sb_info *sbi = data;
555         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
556         wait_queue_head_t *q = &fcc->flush_wait_queue;
557 repeat:
558         if (kthread_should_stop())
559                 return 0;
560
561         if (!llist_empty(&fcc->issue_list)) {
562                 struct flush_cmd *cmd, *next;
563                 int ret;
564
565                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
566                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
567
568                 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
569
570                 ret = submit_flush_wait(sbi, cmd->ino);
571                 atomic_inc(&fcc->issued_flush);
572
573                 llist_for_each_entry_safe(cmd, next,
574                                           fcc->dispatch_list, llnode) {
575                         cmd->ret = ret;
576                         complete(&cmd->wait);
577                 }
578                 fcc->dispatch_list = NULL;
579         }
580
581         wait_event_interruptible(*q,
582                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
583         goto repeat;
584 }
585
586 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
587 {
588         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
589         struct flush_cmd cmd;
590         int ret;
591
592         if (test_opt(sbi, NOBARRIER))
593                 return 0;
594
595         if (!test_opt(sbi, FLUSH_MERGE)) {
596                 atomic_inc(&fcc->queued_flush);
597                 ret = submit_flush_wait(sbi, ino);
598                 atomic_dec(&fcc->queued_flush);
599                 atomic_inc(&fcc->issued_flush);
600                 return ret;
601         }
602
603         if (atomic_inc_return(&fcc->queued_flush) == 1 ||
604             f2fs_is_multi_device(sbi)) {
605                 ret = submit_flush_wait(sbi, ino);
606                 atomic_dec(&fcc->queued_flush);
607
608                 atomic_inc(&fcc->issued_flush);
609                 return ret;
610         }
611
612         cmd.ino = ino;
613         init_completion(&cmd.wait);
614
615         llist_add(&cmd.llnode, &fcc->issue_list);
616
617         /*
618          * update issue_list before we wake up issue_flush thread, this
619          * smp_mb() pairs with another barrier in ___wait_event(), see
620          * more details in comments of waitqueue_active().
621          */
622         smp_mb();
623
624         if (waitqueue_active(&fcc->flush_wait_queue))
625                 wake_up(&fcc->flush_wait_queue);
626
627         if (fcc->f2fs_issue_flush) {
628                 wait_for_completion(&cmd.wait);
629                 atomic_dec(&fcc->queued_flush);
630         } else {
631                 struct llist_node *list;
632
633                 list = llist_del_all(&fcc->issue_list);
634                 if (!list) {
635                         wait_for_completion(&cmd.wait);
636                         atomic_dec(&fcc->queued_flush);
637                 } else {
638                         struct flush_cmd *tmp, *next;
639
640                         ret = submit_flush_wait(sbi, ino);
641
642                         llist_for_each_entry_safe(tmp, next, list, llnode) {
643                                 if (tmp == &cmd) {
644                                         cmd.ret = ret;
645                                         atomic_dec(&fcc->queued_flush);
646                                         continue;
647                                 }
648                                 tmp->ret = ret;
649                                 complete(&tmp->wait);
650                         }
651                 }
652         }
653
654         return cmd.ret;
655 }
656
657 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
658 {
659         dev_t dev = sbi->sb->s_bdev->bd_dev;
660         struct flush_cmd_control *fcc;
661
662         if (SM_I(sbi)->fcc_info) {
663                 fcc = SM_I(sbi)->fcc_info;
664                 if (fcc->f2fs_issue_flush)
665                         return 0;
666                 goto init_thread;
667         }
668
669         fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
670         if (!fcc)
671                 return -ENOMEM;
672         atomic_set(&fcc->issued_flush, 0);
673         atomic_set(&fcc->queued_flush, 0);
674         init_waitqueue_head(&fcc->flush_wait_queue);
675         init_llist_head(&fcc->issue_list);
676         SM_I(sbi)->fcc_info = fcc;
677         if (!test_opt(sbi, FLUSH_MERGE))
678                 return 0;
679
680 init_thread:
681         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
682                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
683         if (IS_ERR(fcc->f2fs_issue_flush)) {
684                 int err = PTR_ERR(fcc->f2fs_issue_flush);
685
686                 fcc->f2fs_issue_flush = NULL;
687                 return err;
688         }
689
690         return 0;
691 }
692
693 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
694 {
695         struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
696
697         if (fcc && fcc->f2fs_issue_flush) {
698                 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
699
700                 fcc->f2fs_issue_flush = NULL;
701                 kthread_stop(flush_thread);
702         }
703         if (free) {
704                 kfree(fcc);
705                 SM_I(sbi)->fcc_info = NULL;
706         }
707 }
708
709 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
710 {
711         int ret = 0, i;
712
713         if (!f2fs_is_multi_device(sbi))
714                 return 0;
715
716         if (test_opt(sbi, NOBARRIER))
717                 return 0;
718
719         for (i = 1; i < sbi->s_ndevs; i++) {
720                 int count = DEFAULT_RETRY_IO_COUNT;
721
722                 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
723                         continue;
724
725                 do {
726                         ret = __submit_flush_wait(sbi, FDEV(i).bdev);
727                         if (ret)
728                                 f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
729                 } while (ret && --count);
730
731                 if (ret) {
732                         f2fs_stop_checkpoint(sbi, false,
733                                         STOP_CP_REASON_FLUSH_FAIL);
734                         break;
735                 }
736
737                 spin_lock(&sbi->dev_lock);
738                 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
739                 spin_unlock(&sbi->dev_lock);
740         }
741
742         return ret;
743 }
744
745 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
746                 enum dirty_type dirty_type)
747 {
748         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
749
750         /* need not be added */
751         if (IS_CURSEG(sbi, segno))
752                 return;
753
754         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
755                 dirty_i->nr_dirty[dirty_type]++;
756
757         if (dirty_type == DIRTY) {
758                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
759                 enum dirty_type t = sentry->type;
760
761                 if (unlikely(t >= DIRTY)) {
762                         f2fs_bug_on(sbi, 1);
763                         return;
764                 }
765                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
766                         dirty_i->nr_dirty[t]++;
767
768                 if (__is_large_section(sbi)) {
769                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
770                         block_t valid_blocks =
771                                 get_valid_blocks(sbi, segno, true);
772
773                         f2fs_bug_on(sbi, unlikely(!valid_blocks ||
774                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)));
775
776                         if (!IS_CURSEC(sbi, secno))
777                                 set_bit(secno, dirty_i->dirty_secmap);
778                 }
779         }
780 }
781
782 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
783                 enum dirty_type dirty_type)
784 {
785         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
786         block_t valid_blocks;
787
788         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
789                 dirty_i->nr_dirty[dirty_type]--;
790
791         if (dirty_type == DIRTY) {
792                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
793                 enum dirty_type t = sentry->type;
794
795                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
796                         dirty_i->nr_dirty[t]--;
797
798                 valid_blocks = get_valid_blocks(sbi, segno, true);
799                 if (valid_blocks == 0) {
800                         clear_bit(GET_SEC_FROM_SEG(sbi, segno),
801                                                 dirty_i->victim_secmap);
802 #ifdef CONFIG_F2FS_CHECK_FS
803                         clear_bit(segno, SIT_I(sbi)->invalid_segmap);
804 #endif
805                 }
806                 if (__is_large_section(sbi)) {
807                         unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
808
809                         if (!valid_blocks ||
810                                         valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
811                                 clear_bit(secno, dirty_i->dirty_secmap);
812                                 return;
813                         }
814
815                         if (!IS_CURSEC(sbi, secno))
816                                 set_bit(secno, dirty_i->dirty_secmap);
817                 }
818         }
819 }
820
821 /*
822  * Should not occur error such as -ENOMEM.
823  * Adding dirty entry into seglist is not critical operation.
824  * If a given segment is one of current working segments, it won't be added.
825  */
826 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
827 {
828         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
829         unsigned short valid_blocks, ckpt_valid_blocks;
830         unsigned int usable_blocks;
831
832         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
833                 return;
834
835         usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
836         mutex_lock(&dirty_i->seglist_lock);
837
838         valid_blocks = get_valid_blocks(sbi, segno, false);
839         ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
840
841         if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
842                 ckpt_valid_blocks == usable_blocks)) {
843                 __locate_dirty_segment(sbi, segno, PRE);
844                 __remove_dirty_segment(sbi, segno, DIRTY);
845         } else if (valid_blocks < usable_blocks) {
846                 __locate_dirty_segment(sbi, segno, DIRTY);
847         } else {
848                 /* Recovery routine with SSR needs this */
849                 __remove_dirty_segment(sbi, segno, DIRTY);
850         }
851
852         mutex_unlock(&dirty_i->seglist_lock);
853 }
854
855 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
856 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
857 {
858         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
859         unsigned int segno;
860
861         mutex_lock(&dirty_i->seglist_lock);
862         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
863                 if (get_valid_blocks(sbi, segno, false))
864                         continue;
865                 if (IS_CURSEG(sbi, segno))
866                         continue;
867                 __locate_dirty_segment(sbi, segno, PRE);
868                 __remove_dirty_segment(sbi, segno, DIRTY);
869         }
870         mutex_unlock(&dirty_i->seglist_lock);
871 }
872
873 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
874 {
875         int ovp_hole_segs =
876                 (overprovision_segments(sbi) - reserved_segments(sbi));
877         block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
878         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
879         block_t holes[2] = {0, 0};      /* DATA and NODE */
880         block_t unusable;
881         struct seg_entry *se;
882         unsigned int segno;
883
884         mutex_lock(&dirty_i->seglist_lock);
885         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
886                 se = get_seg_entry(sbi, segno);
887                 if (IS_NODESEG(se->type))
888                         holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
889                                                         se->valid_blocks;
890                 else
891                         holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
892                                                         se->valid_blocks;
893         }
894         mutex_unlock(&dirty_i->seglist_lock);
895
896         unusable = max(holes[DATA], holes[NODE]);
897         if (unusable > ovp_holes)
898                 return unusable - ovp_holes;
899         return 0;
900 }
901
902 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
903 {
904         int ovp_hole_segs =
905                 (overprovision_segments(sbi) - reserved_segments(sbi));
906         if (unusable > F2FS_OPTION(sbi).unusable_cap)
907                 return -EAGAIN;
908         if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
909                 dirty_segments(sbi) > ovp_hole_segs)
910                 return -EAGAIN;
911         return 0;
912 }
913
914 /* This is only used by SBI_CP_DISABLED */
915 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
916 {
917         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
918         unsigned int segno = 0;
919
920         mutex_lock(&dirty_i->seglist_lock);
921         for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
922                 if (get_valid_blocks(sbi, segno, false))
923                         continue;
924                 if (get_ckpt_valid_blocks(sbi, segno, false))
925                         continue;
926                 mutex_unlock(&dirty_i->seglist_lock);
927                 return segno;
928         }
929         mutex_unlock(&dirty_i->seglist_lock);
930         return NULL_SEGNO;
931 }
932
933 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
934                 struct block_device *bdev, block_t lstart,
935                 block_t start, block_t len)
936 {
937         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
938         struct list_head *pend_list;
939         struct discard_cmd *dc;
940
941         f2fs_bug_on(sbi, !len);
942
943         pend_list = &dcc->pend_list[plist_idx(len)];
944
945         dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
946         INIT_LIST_HEAD(&dc->list);
947         dc->bdev = bdev;
948         dc->di.lstart = lstart;
949         dc->di.start = start;
950         dc->di.len = len;
951         dc->ref = 0;
952         dc->state = D_PREP;
953         dc->queued = 0;
954         dc->error = 0;
955         init_completion(&dc->wait);
956         list_add_tail(&dc->list, pend_list);
957         spin_lock_init(&dc->lock);
958         dc->bio_ref = 0;
959         atomic_inc(&dcc->discard_cmd_cnt);
960         dcc->undiscard_blks += len;
961
962         return dc;
963 }
964
965 static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
966 {
967 #ifdef CONFIG_F2FS_CHECK_FS
968         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
969         struct rb_node *cur = rb_first_cached(&dcc->root), *next;
970         struct discard_cmd *cur_dc, *next_dc;
971
972         while (cur) {
973                 next = rb_next(cur);
974                 if (!next)
975                         return true;
976
977                 cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
978                 next_dc = rb_entry(next, struct discard_cmd, rb_node);
979
980                 if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
981                         f2fs_info(sbi, "broken discard_rbtree, "
982                                 "cur(%u, %u) next(%u, %u)",
983                                 cur_dc->di.lstart, cur_dc->di.len,
984                                 next_dc->di.lstart, next_dc->di.len);
985                         return false;
986                 }
987                 cur = next;
988         }
989 #endif
990         return true;
991 }
992
993 static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
994                                                 block_t blkaddr)
995 {
996         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
997         struct rb_node *node = dcc->root.rb_root.rb_node;
998         struct discard_cmd *dc;
999
1000         while (node) {
1001                 dc = rb_entry(node, struct discard_cmd, rb_node);
1002
1003                 if (blkaddr < dc->di.lstart)
1004                         node = node->rb_left;
1005                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1006                         node = node->rb_right;
1007                 else
1008                         return dc;
1009         }
1010         return NULL;
1011 }
1012
1013 static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1014                                 block_t blkaddr,
1015                                 struct discard_cmd **prev_entry,
1016                                 struct discard_cmd **next_entry,
1017                                 struct rb_node ***insert_p,
1018                                 struct rb_node **insert_parent)
1019 {
1020         struct rb_node **pnode = &root->rb_root.rb_node;
1021         struct rb_node *parent = NULL, *tmp_node;
1022         struct discard_cmd *dc;
1023
1024         *insert_p = NULL;
1025         *insert_parent = NULL;
1026         *prev_entry = NULL;
1027         *next_entry = NULL;
1028
1029         if (RB_EMPTY_ROOT(&root->rb_root))
1030                 return NULL;
1031
1032         while (*pnode) {
1033                 parent = *pnode;
1034                 dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1035
1036                 if (blkaddr < dc->di.lstart)
1037                         pnode = &(*pnode)->rb_left;
1038                 else if (blkaddr >= dc->di.lstart + dc->di.len)
1039                         pnode = &(*pnode)->rb_right;
1040                 else
1041                         goto lookup_neighbors;
1042         }
1043
1044         *insert_p = pnode;
1045         *insert_parent = parent;
1046
1047         dc = rb_entry(parent, struct discard_cmd, rb_node);
1048         tmp_node = parent;
1049         if (parent && blkaddr > dc->di.lstart)
1050                 tmp_node = rb_next(parent);
1051         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1052
1053         tmp_node = parent;
1054         if (parent && blkaddr < dc->di.lstart)
1055                 tmp_node = rb_prev(parent);
1056         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1057         return NULL;
1058
1059 lookup_neighbors:
1060         /* lookup prev node for merging backward later */
1061         tmp_node = rb_prev(&dc->rb_node);
1062         *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1063
1064         /* lookup next node for merging frontward later */
1065         tmp_node = rb_next(&dc->rb_node);
1066         *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1067         return dc;
1068 }
1069
1070 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1071                                                         struct discard_cmd *dc)
1072 {
1073         if (dc->state == D_DONE)
1074                 atomic_sub(dc->queued, &dcc->queued_discard);
1075
1076         list_del(&dc->list);
1077         rb_erase_cached(&dc->rb_node, &dcc->root);
1078         dcc->undiscard_blks -= dc->di.len;
1079
1080         kmem_cache_free(discard_cmd_slab, dc);
1081
1082         atomic_dec(&dcc->discard_cmd_cnt);
1083 }
1084
1085 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1086                                                         struct discard_cmd *dc)
1087 {
1088         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1089         unsigned long flags;
1090
1091         trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1092
1093         spin_lock_irqsave(&dc->lock, flags);
1094         if (dc->bio_ref) {
1095                 spin_unlock_irqrestore(&dc->lock, flags);
1096                 return;
1097         }
1098         spin_unlock_irqrestore(&dc->lock, flags);
1099
1100         f2fs_bug_on(sbi, dc->ref);
1101
1102         if (dc->error == -EOPNOTSUPP)
1103                 dc->error = 0;
1104
1105         if (dc->error)
1106                 printk_ratelimited(
1107                         "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1108                         KERN_INFO, sbi->sb->s_id,
1109                         dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1110         __detach_discard_cmd(dcc, dc);
1111 }
1112
1113 static void f2fs_submit_discard_endio(struct bio *bio)
1114 {
1115         struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1116         unsigned long flags;
1117
1118         spin_lock_irqsave(&dc->lock, flags);
1119         if (!dc->error)
1120                 dc->error = blk_status_to_errno(bio->bi_status);
1121         dc->bio_ref--;
1122         if (!dc->bio_ref && dc->state == D_SUBMIT) {
1123                 dc->state = D_DONE;
1124                 complete_all(&dc->wait);
1125         }
1126         spin_unlock_irqrestore(&dc->lock, flags);
1127         bio_put(bio);
1128 }
1129
1130 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1131                                 block_t start, block_t end)
1132 {
1133 #ifdef CONFIG_F2FS_CHECK_FS
1134         struct seg_entry *sentry;
1135         unsigned int segno;
1136         block_t blk = start;
1137         unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1138         unsigned long *map;
1139
1140         while (blk < end) {
1141                 segno = GET_SEGNO(sbi, blk);
1142                 sentry = get_seg_entry(sbi, segno);
1143                 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1144
1145                 if (end < START_BLOCK(sbi, segno + 1))
1146                         size = GET_BLKOFF_FROM_SEG0(sbi, end);
1147                 else
1148                         size = max_blocks;
1149                 map = (unsigned long *)(sentry->cur_valid_map);
1150                 offset = __find_rev_next_bit(map, size, offset);
1151                 f2fs_bug_on(sbi, offset != size);
1152                 blk = START_BLOCK(sbi, segno + 1);
1153         }
1154 #endif
1155 }
1156
1157 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1158                                 struct discard_policy *dpolicy,
1159                                 int discard_type, unsigned int granularity)
1160 {
1161         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1162
1163         /* common policy */
1164         dpolicy->type = discard_type;
1165         dpolicy->sync = true;
1166         dpolicy->ordered = false;
1167         dpolicy->granularity = granularity;
1168
1169         dpolicy->max_requests = dcc->max_discard_request;
1170         dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1171         dpolicy->timeout = false;
1172
1173         if (discard_type == DPOLICY_BG) {
1174                 dpolicy->min_interval = dcc->min_discard_issue_time;
1175                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1176                 dpolicy->max_interval = dcc->max_discard_issue_time;
1177                 if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
1178                         dpolicy->io_aware = true;
1179                 else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
1180                         dpolicy->io_aware = false;
1181                 dpolicy->sync = false;
1182                 dpolicy->ordered = true;
1183                 if (utilization(sbi) > dcc->discard_urgent_util) {
1184                         dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1185                         if (atomic_read(&dcc->discard_cmd_cnt))
1186                                 dpolicy->max_interval =
1187                                         dcc->min_discard_issue_time;
1188                 }
1189         } else if (discard_type == DPOLICY_FORCE) {
1190                 dpolicy->min_interval = dcc->min_discard_issue_time;
1191                 dpolicy->mid_interval = dcc->mid_discard_issue_time;
1192                 dpolicy->max_interval = dcc->max_discard_issue_time;
1193                 dpolicy->io_aware = false;
1194         } else if (discard_type == DPOLICY_FSTRIM) {
1195                 dpolicy->io_aware = false;
1196         } else if (discard_type == DPOLICY_UMOUNT) {
1197                 dpolicy->io_aware = false;
1198                 /* we need to issue all to keep CP_TRIMMED_FLAG */
1199                 dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1200                 dpolicy->timeout = true;
1201         }
1202 }
1203
1204 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1205                                 struct block_device *bdev, block_t lstart,
1206                                 block_t start, block_t len);
1207
1208 #ifdef CONFIG_BLK_DEV_ZONED
1209 static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1210                                    struct discard_cmd *dc, blk_opf_t flag,
1211                                    struct list_head *wait_list,
1212                                    unsigned int *issued)
1213 {
1214         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1215         struct block_device *bdev = dc->bdev;
1216         struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1217         unsigned long flags;
1218
1219         trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1220
1221         spin_lock_irqsave(&dc->lock, flags);
1222         dc->state = D_SUBMIT;
1223         dc->bio_ref++;
1224         spin_unlock_irqrestore(&dc->lock, flags);
1225
1226         if (issued)
1227                 (*issued)++;
1228
1229         atomic_inc(&dcc->queued_discard);
1230         dc->queued++;
1231         list_move_tail(&dc->list, wait_list);
1232
1233         /* sanity check on discard range */
1234         __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1235
1236         bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1237         bio->bi_private = dc;
1238         bio->bi_end_io = f2fs_submit_discard_endio;
1239         submit_bio(bio);
1240
1241         atomic_inc(&dcc->issued_discard);
1242         f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1243 }
1244 #endif
1245
1246 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1247 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1248                                 struct discard_policy *dpolicy,
1249                                 struct discard_cmd *dc, int *issued)
1250 {
1251         struct block_device *bdev = dc->bdev;
1252         unsigned int max_discard_blocks =
1253                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1254         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1255         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1256                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1257         blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1258         block_t lstart, start, len, total_len;
1259         int err = 0;
1260
1261         if (dc->state != D_PREP)
1262                 return 0;
1263
1264         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1265                 return 0;
1266
1267 #ifdef CONFIG_BLK_DEV_ZONED
1268         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1269                 int devi = f2fs_bdev_index(sbi, bdev);
1270
1271                 if (devi < 0)
1272                         return -EINVAL;
1273
1274                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1275                         __submit_zone_reset_cmd(sbi, dc, flag,
1276                                                 wait_list, issued);
1277                         return 0;
1278                 }
1279         }
1280 #endif
1281
1282         trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1283
1284         lstart = dc->di.lstart;
1285         start = dc->di.start;
1286         len = dc->di.len;
1287         total_len = len;
1288
1289         dc->di.len = 0;
1290
1291         while (total_len && *issued < dpolicy->max_requests && !err) {
1292                 struct bio *bio = NULL;
1293                 unsigned long flags;
1294                 bool last = true;
1295
1296                 if (len > max_discard_blocks) {
1297                         len = max_discard_blocks;
1298                         last = false;
1299                 }
1300
1301                 (*issued)++;
1302                 if (*issued == dpolicy->max_requests)
1303                         last = true;
1304
1305                 dc->di.len += len;
1306
1307                 if (time_to_inject(sbi, FAULT_DISCARD)) {
1308                         err = -EIO;
1309                 } else {
1310                         err = __blkdev_issue_discard(bdev,
1311                                         SECTOR_FROM_BLOCK(start),
1312                                         SECTOR_FROM_BLOCK(len),
1313                                         GFP_NOFS, &bio);
1314                 }
1315                 if (err) {
1316                         spin_lock_irqsave(&dc->lock, flags);
1317                         if (dc->state == D_PARTIAL)
1318                                 dc->state = D_SUBMIT;
1319                         spin_unlock_irqrestore(&dc->lock, flags);
1320
1321                         break;
1322                 }
1323
1324                 f2fs_bug_on(sbi, !bio);
1325
1326                 /*
1327                  * should keep before submission to avoid D_DONE
1328                  * right away
1329                  */
1330                 spin_lock_irqsave(&dc->lock, flags);
1331                 if (last)
1332                         dc->state = D_SUBMIT;
1333                 else
1334                         dc->state = D_PARTIAL;
1335                 dc->bio_ref++;
1336                 spin_unlock_irqrestore(&dc->lock, flags);
1337
1338                 atomic_inc(&dcc->queued_discard);
1339                 dc->queued++;
1340                 list_move_tail(&dc->list, wait_list);
1341
1342                 /* sanity check on discard range */
1343                 __check_sit_bitmap(sbi, lstart, lstart + len);
1344
1345                 bio->bi_private = dc;
1346                 bio->bi_end_io = f2fs_submit_discard_endio;
1347                 bio->bi_opf |= flag;
1348                 submit_bio(bio);
1349
1350                 atomic_inc(&dcc->issued_discard);
1351
1352                 f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1353
1354                 lstart += len;
1355                 start += len;
1356                 total_len -= len;
1357                 len = total_len;
1358         }
1359
1360         if (!err && len) {
1361                 dcc->undiscard_blks -= len;
1362                 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1363         }
1364         return err;
1365 }
1366
1367 static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1368                                 struct block_device *bdev, block_t lstart,
1369                                 block_t start, block_t len)
1370 {
1371         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1372         struct rb_node **p = &dcc->root.rb_root.rb_node;
1373         struct rb_node *parent = NULL;
1374         struct discard_cmd *dc;
1375         bool leftmost = true;
1376
1377         /* look up rb tree to find parent node */
1378         while (*p) {
1379                 parent = *p;
1380                 dc = rb_entry(parent, struct discard_cmd, rb_node);
1381
1382                 if (lstart < dc->di.lstart) {
1383                         p = &(*p)->rb_left;
1384                 } else if (lstart >= dc->di.lstart + dc->di.len) {
1385                         p = &(*p)->rb_right;
1386                         leftmost = false;
1387                 } else {
1388                         /* Let's skip to add, if exists */
1389                         return;
1390                 }
1391         }
1392
1393         dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1394
1395         rb_link_node(&dc->rb_node, parent, p);
1396         rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1397 }
1398
1399 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1400                                                 struct discard_cmd *dc)
1401 {
1402         list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1403 }
1404
1405 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1406                                 struct discard_cmd *dc, block_t blkaddr)
1407 {
1408         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1409         struct discard_info di = dc->di;
1410         bool modified = false;
1411
1412         if (dc->state == D_DONE || dc->di.len == 1) {
1413                 __remove_discard_cmd(sbi, dc);
1414                 return;
1415         }
1416
1417         dcc->undiscard_blks -= di.len;
1418
1419         if (blkaddr > di.lstart) {
1420                 dc->di.len = blkaddr - dc->di.lstart;
1421                 dcc->undiscard_blks += dc->di.len;
1422                 __relocate_discard_cmd(dcc, dc);
1423                 modified = true;
1424         }
1425
1426         if (blkaddr < di.lstart + di.len - 1) {
1427                 if (modified) {
1428                         __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1429                                         di.start + blkaddr + 1 - di.lstart,
1430                                         di.lstart + di.len - 1 - blkaddr);
1431                 } else {
1432                         dc->di.lstart++;
1433                         dc->di.len--;
1434                         dc->di.start++;
1435                         dcc->undiscard_blks += dc->di.len;
1436                         __relocate_discard_cmd(dcc, dc);
1437                 }
1438         }
1439 }
1440
1441 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1442                                 struct block_device *bdev, block_t lstart,
1443                                 block_t start, block_t len)
1444 {
1445         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1446         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1447         struct discard_cmd *dc;
1448         struct discard_info di = {0};
1449         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1450         unsigned int max_discard_blocks =
1451                         SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1452         block_t end = lstart + len;
1453
1454         dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1455                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1456         if (dc)
1457                 prev_dc = dc;
1458
1459         if (!prev_dc) {
1460                 di.lstart = lstart;
1461                 di.len = next_dc ? next_dc->di.lstart - lstart : len;
1462                 di.len = min(di.len, len);
1463                 di.start = start;
1464         }
1465
1466         while (1) {
1467                 struct rb_node *node;
1468                 bool merged = false;
1469                 struct discard_cmd *tdc = NULL;
1470
1471                 if (prev_dc) {
1472                         di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1473                         if (di.lstart < lstart)
1474                                 di.lstart = lstart;
1475                         if (di.lstart >= end)
1476                                 break;
1477
1478                         if (!next_dc || next_dc->di.lstart > end)
1479                                 di.len = end - di.lstart;
1480                         else
1481                                 di.len = next_dc->di.lstart - di.lstart;
1482                         di.start = start + di.lstart - lstart;
1483                 }
1484
1485                 if (!di.len)
1486                         goto next;
1487
1488                 if (prev_dc && prev_dc->state == D_PREP &&
1489                         prev_dc->bdev == bdev &&
1490                         __is_discard_back_mergeable(&di, &prev_dc->di,
1491                                                         max_discard_blocks)) {
1492                         prev_dc->di.len += di.len;
1493                         dcc->undiscard_blks += di.len;
1494                         __relocate_discard_cmd(dcc, prev_dc);
1495                         di = prev_dc->di;
1496                         tdc = prev_dc;
1497                         merged = true;
1498                 }
1499
1500                 if (next_dc && next_dc->state == D_PREP &&
1501                         next_dc->bdev == bdev &&
1502                         __is_discard_front_mergeable(&di, &next_dc->di,
1503                                                         max_discard_blocks)) {
1504                         next_dc->di.lstart = di.lstart;
1505                         next_dc->di.len += di.len;
1506                         next_dc->di.start = di.start;
1507                         dcc->undiscard_blks += di.len;
1508                         __relocate_discard_cmd(dcc, next_dc);
1509                         if (tdc)
1510                                 __remove_discard_cmd(sbi, tdc);
1511                         merged = true;
1512                 }
1513
1514                 if (!merged)
1515                         __insert_discard_cmd(sbi, bdev,
1516                                                 di.lstart, di.start, di.len);
1517  next:
1518                 prev_dc = next_dc;
1519                 if (!prev_dc)
1520                         break;
1521
1522                 node = rb_next(&prev_dc->rb_node);
1523                 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1524         }
1525 }
1526
1527 #ifdef CONFIG_BLK_DEV_ZONED
1528 static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1529                 struct block_device *bdev, block_t blkstart, block_t lblkstart,
1530                 block_t blklen)
1531 {
1532         trace_f2fs_queue_reset_zone(bdev, blkstart);
1533
1534         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1535         __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1536         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1537 }
1538 #endif
1539
1540 static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1541                 struct block_device *bdev, block_t blkstart, block_t blklen)
1542 {
1543         block_t lblkstart = blkstart;
1544
1545         if (!f2fs_bdev_support_discard(bdev))
1546                 return;
1547
1548         trace_f2fs_queue_discard(bdev, blkstart, blklen);
1549
1550         if (f2fs_is_multi_device(sbi)) {
1551                 int devi = f2fs_target_device_index(sbi, blkstart);
1552
1553                 blkstart -= FDEV(devi).start_blk;
1554         }
1555         mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1556         __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1557         mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1558 }
1559
1560 static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1561                 struct discard_policy *dpolicy, int *issued)
1562 {
1563         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1564         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1565         struct rb_node **insert_p = NULL, *insert_parent = NULL;
1566         struct discard_cmd *dc;
1567         struct blk_plug plug;
1568         bool io_interrupted = false;
1569
1570         mutex_lock(&dcc->cmd_lock);
1571         dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1572                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
1573         if (!dc)
1574                 dc = next_dc;
1575
1576         blk_start_plug(&plug);
1577
1578         while (dc) {
1579                 struct rb_node *node;
1580                 int err = 0;
1581
1582                 if (dc->state != D_PREP)
1583                         goto next;
1584
1585                 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1586                         io_interrupted = true;
1587                         break;
1588                 }
1589
1590                 dcc->next_pos = dc->di.lstart + dc->di.len;
1591                 err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1592
1593                 if (*issued >= dpolicy->max_requests)
1594                         break;
1595 next:
1596                 node = rb_next(&dc->rb_node);
1597                 if (err)
1598                         __remove_discard_cmd(sbi, dc);
1599                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1600         }
1601
1602         blk_finish_plug(&plug);
1603
1604         if (!dc)
1605                 dcc->next_pos = 0;
1606
1607         mutex_unlock(&dcc->cmd_lock);
1608
1609         if (!(*issued) && io_interrupted)
1610                 *issued = -1;
1611 }
1612 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1613                                         struct discard_policy *dpolicy);
1614
1615 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1616                                         struct discard_policy *dpolicy)
1617 {
1618         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1619         struct list_head *pend_list;
1620         struct discard_cmd *dc, *tmp;
1621         struct blk_plug plug;
1622         int i, issued;
1623         bool io_interrupted = false;
1624
1625         if (dpolicy->timeout)
1626                 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1627
1628 retry:
1629         issued = 0;
1630         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1631                 if (dpolicy->timeout &&
1632                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1633                         break;
1634
1635                 if (i + 1 < dpolicy->granularity)
1636                         break;
1637
1638                 if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1639                         __issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1640                         return issued;
1641                 }
1642
1643                 pend_list = &dcc->pend_list[i];
1644
1645                 mutex_lock(&dcc->cmd_lock);
1646                 if (list_empty(pend_list))
1647                         goto next;
1648                 if (unlikely(dcc->rbtree_check))
1649                         f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1650                 blk_start_plug(&plug);
1651                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1652                         f2fs_bug_on(sbi, dc->state != D_PREP);
1653
1654                         if (dpolicy->timeout &&
1655                                 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1656                                 break;
1657
1658                         if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1659                                                 !is_idle(sbi, DISCARD_TIME)) {
1660                                 io_interrupted = true;
1661                                 break;
1662                         }
1663
1664                         __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1665
1666                         if (issued >= dpolicy->max_requests)
1667                                 break;
1668                 }
1669                 blk_finish_plug(&plug);
1670 next:
1671                 mutex_unlock(&dcc->cmd_lock);
1672
1673                 if (issued >= dpolicy->max_requests || io_interrupted)
1674                         break;
1675         }
1676
1677         if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1678                 __wait_all_discard_cmd(sbi, dpolicy);
1679                 goto retry;
1680         }
1681
1682         if (!issued && io_interrupted)
1683                 issued = -1;
1684
1685         return issued;
1686 }
1687
1688 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1689 {
1690         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1691         struct list_head *pend_list;
1692         struct discard_cmd *dc, *tmp;
1693         int i;
1694         bool dropped = false;
1695
1696         mutex_lock(&dcc->cmd_lock);
1697         for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1698                 pend_list = &dcc->pend_list[i];
1699                 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1700                         f2fs_bug_on(sbi, dc->state != D_PREP);
1701                         __remove_discard_cmd(sbi, dc);
1702                         dropped = true;
1703                 }
1704         }
1705         mutex_unlock(&dcc->cmd_lock);
1706
1707         return dropped;
1708 }
1709
1710 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1711 {
1712         __drop_discard_cmd(sbi);
1713 }
1714
1715 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1716                                                         struct discard_cmd *dc)
1717 {
1718         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1719         unsigned int len = 0;
1720
1721         wait_for_completion_io(&dc->wait);
1722         mutex_lock(&dcc->cmd_lock);
1723         f2fs_bug_on(sbi, dc->state != D_DONE);
1724         dc->ref--;
1725         if (!dc->ref) {
1726                 if (!dc->error)
1727                         len = dc->di.len;
1728                 __remove_discard_cmd(sbi, dc);
1729         }
1730         mutex_unlock(&dcc->cmd_lock);
1731
1732         return len;
1733 }
1734
1735 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1736                                                 struct discard_policy *dpolicy,
1737                                                 block_t start, block_t end)
1738 {
1739         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1740         struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1741                                         &(dcc->fstrim_list) : &(dcc->wait_list);
1742         struct discard_cmd *dc = NULL, *iter, *tmp;
1743         unsigned int trimmed = 0;
1744
1745 next:
1746         dc = NULL;
1747
1748         mutex_lock(&dcc->cmd_lock);
1749         list_for_each_entry_safe(iter, tmp, wait_list, list) {
1750                 if (iter->di.lstart + iter->di.len <= start ||
1751                                         end <= iter->di.lstart)
1752                         continue;
1753                 if (iter->di.len < dpolicy->granularity)
1754                         continue;
1755                 if (iter->state == D_DONE && !iter->ref) {
1756                         wait_for_completion_io(&iter->wait);
1757                         if (!iter->error)
1758                                 trimmed += iter->di.len;
1759                         __remove_discard_cmd(sbi, iter);
1760                 } else {
1761                         iter->ref++;
1762                         dc = iter;
1763                         break;
1764                 }
1765         }
1766         mutex_unlock(&dcc->cmd_lock);
1767
1768         if (dc) {
1769                 trimmed += __wait_one_discard_bio(sbi, dc);
1770                 goto next;
1771         }
1772
1773         return trimmed;
1774 }
1775
1776 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1777                                                 struct discard_policy *dpolicy)
1778 {
1779         struct discard_policy dp;
1780         unsigned int discard_blks;
1781
1782         if (dpolicy)
1783                 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1784
1785         /* wait all */
1786         __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1787         discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1788         __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1789         discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1790
1791         return discard_blks;
1792 }
1793
1794 /* This should be covered by global mutex, &sit_i->sentry_lock */
1795 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1796 {
1797         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1798         struct discard_cmd *dc;
1799         bool need_wait = false;
1800
1801         mutex_lock(&dcc->cmd_lock);
1802         dc = __lookup_discard_cmd(sbi, blkaddr);
1803 #ifdef CONFIG_BLK_DEV_ZONED
1804         if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1805                 int devi = f2fs_bdev_index(sbi, dc->bdev);
1806
1807                 if (devi < 0) {
1808                         mutex_unlock(&dcc->cmd_lock);
1809                         return;
1810                 }
1811
1812                 if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1813                         /* force submit zone reset */
1814                         if (dc->state == D_PREP)
1815                                 __submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1816                                                         &dcc->wait_list, NULL);
1817                         dc->ref++;
1818                         mutex_unlock(&dcc->cmd_lock);
1819                         /* wait zone reset */
1820                         __wait_one_discard_bio(sbi, dc);
1821                         return;
1822                 }
1823         }
1824 #endif
1825         if (dc) {
1826                 if (dc->state == D_PREP) {
1827                         __punch_discard_cmd(sbi, dc, blkaddr);
1828                 } else {
1829                         dc->ref++;
1830                         need_wait = true;
1831                 }
1832         }
1833         mutex_unlock(&dcc->cmd_lock);
1834
1835         if (need_wait)
1836                 __wait_one_discard_bio(sbi, dc);
1837 }
1838
1839 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1840 {
1841         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1842
1843         if (dcc && dcc->f2fs_issue_discard) {
1844                 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1845
1846                 dcc->f2fs_issue_discard = NULL;
1847                 kthread_stop(discard_thread);
1848         }
1849 }
1850
1851 /**
1852  * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1853  * @sbi: the f2fs_sb_info data for discard cmd to issue
1854  *
1855  * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1856  *
1857  * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1858  */
1859 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1860 {
1861         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1862         struct discard_policy dpolicy;
1863         bool dropped;
1864
1865         if (!atomic_read(&dcc->discard_cmd_cnt))
1866                 return true;
1867
1868         __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1869                                         dcc->discard_granularity);
1870         __issue_discard_cmd(sbi, &dpolicy);
1871         dropped = __drop_discard_cmd(sbi);
1872
1873         /* just to make sure there is no pending discard commands */
1874         __wait_all_discard_cmd(sbi, NULL);
1875
1876         f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1877         return !dropped;
1878 }
1879
1880 static int issue_discard_thread(void *data)
1881 {
1882         struct f2fs_sb_info *sbi = data;
1883         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1884         wait_queue_head_t *q = &dcc->discard_wait_queue;
1885         struct discard_policy dpolicy;
1886         unsigned int wait_ms = dcc->min_discard_issue_time;
1887         int issued;
1888
1889         set_freezable();
1890
1891         do {
1892                 wait_event_freezable_timeout(*q,
1893                                 kthread_should_stop() || dcc->discard_wake,
1894                                 msecs_to_jiffies(wait_ms));
1895
1896                 if (sbi->gc_mode == GC_URGENT_HIGH ||
1897                         !f2fs_available_free_memory(sbi, DISCARD_CACHE))
1898                         __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1899                                                 MIN_DISCARD_GRANULARITY);
1900                 else
1901                         __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1902                                                 dcc->discard_granularity);
1903
1904                 if (dcc->discard_wake)
1905                         dcc->discard_wake = false;
1906
1907                 /* clean up pending candidates before going to sleep */
1908                 if (atomic_read(&dcc->queued_discard))
1909                         __wait_all_discard_cmd(sbi, NULL);
1910
1911                 if (f2fs_readonly(sbi->sb))
1912                         continue;
1913                 if (kthread_should_stop())
1914                         return 0;
1915                 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1916                         !atomic_read(&dcc->discard_cmd_cnt)) {
1917                         wait_ms = dpolicy.max_interval;
1918                         continue;
1919                 }
1920
1921                 sb_start_intwrite(sbi->sb);
1922
1923                 issued = __issue_discard_cmd(sbi, &dpolicy);
1924                 if (issued > 0) {
1925                         __wait_all_discard_cmd(sbi, &dpolicy);
1926                         wait_ms = dpolicy.min_interval;
1927                 } else if (issued == -1) {
1928                         wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1929                         if (!wait_ms)
1930                                 wait_ms = dpolicy.mid_interval;
1931                 } else {
1932                         wait_ms = dpolicy.max_interval;
1933                 }
1934                 if (!atomic_read(&dcc->discard_cmd_cnt))
1935                         wait_ms = dpolicy.max_interval;
1936
1937                 sb_end_intwrite(sbi->sb);
1938
1939         } while (!kthread_should_stop());
1940         return 0;
1941 }
1942
1943 #ifdef CONFIG_BLK_DEV_ZONED
1944 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1945                 struct block_device *bdev, block_t blkstart, block_t blklen)
1946 {
1947         sector_t sector, nr_sects;
1948         block_t lblkstart = blkstart;
1949         int devi = 0;
1950         u64 remainder = 0;
1951
1952         if (f2fs_is_multi_device(sbi)) {
1953                 devi = f2fs_target_device_index(sbi, blkstart);
1954                 if (blkstart < FDEV(devi).start_blk ||
1955                     blkstart > FDEV(devi).end_blk) {
1956                         f2fs_err(sbi, "Invalid block %x", blkstart);
1957                         return -EIO;
1958                 }
1959                 blkstart -= FDEV(devi).start_blk;
1960         }
1961
1962         /* For sequential zones, reset the zone write pointer */
1963         if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1964                 sector = SECTOR_FROM_BLOCK(blkstart);
1965                 nr_sects = SECTOR_FROM_BLOCK(blklen);
1966                 div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1967
1968                 if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1969                         f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1970                                  devi, sbi->s_ndevs ? FDEV(devi).path : "",
1971                                  blkstart, blklen);
1972                         return -EIO;
1973                 }
1974
1975                 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1976                         trace_f2fs_issue_reset_zone(bdev, blkstart);
1977                         return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1978                                                 sector, nr_sects, GFP_NOFS);
1979                 }
1980
1981                 __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
1982                 return 0;
1983         }
1984
1985         /* For conventional zones, use regular discard if supported */
1986         __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1987         return 0;
1988 }
1989 #endif
1990
1991 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1992                 struct block_device *bdev, block_t blkstart, block_t blklen)
1993 {
1994 #ifdef CONFIG_BLK_DEV_ZONED
1995         if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1996                 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1997 #endif
1998         __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1999         return 0;
2000 }
2001
2002 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2003                                 block_t blkstart, block_t blklen)
2004 {
2005         sector_t start = blkstart, len = 0;
2006         struct block_device *bdev;
2007         struct seg_entry *se;
2008         unsigned int offset;
2009         block_t i;
2010         int err = 0;
2011
2012         bdev = f2fs_target_device(sbi, blkstart, NULL);
2013
2014         for (i = blkstart; i < blkstart + blklen; i++, len++) {
2015                 if (i != start) {
2016                         struct block_device *bdev2 =
2017                                 f2fs_target_device(sbi, i, NULL);
2018
2019                         if (bdev2 != bdev) {
2020                                 err = __issue_discard_async(sbi, bdev,
2021                                                 start, len);
2022                                 if (err)
2023                                         return err;
2024                                 bdev = bdev2;
2025                                 start = i;
2026                                 len = 0;
2027                         }
2028                 }
2029
2030                 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2031                 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2032
2033                 if (f2fs_block_unit_discard(sbi) &&
2034                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2035                         sbi->discard_blks--;
2036         }
2037
2038         if (len)
2039                 err = __issue_discard_async(sbi, bdev, start, len);
2040         return err;
2041 }
2042
2043 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2044                                                         bool check_only)
2045 {
2046         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2047         int max_blocks = sbi->blocks_per_seg;
2048         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2049         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2050         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2051         unsigned long *discard_map = (unsigned long *)se->discard_map;
2052         unsigned long *dmap = SIT_I(sbi)->tmp_map;
2053         unsigned int start = 0, end = -1;
2054         bool force = (cpc->reason & CP_DISCARD);
2055         struct discard_entry *de = NULL;
2056         struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2057         int i;
2058
2059         if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
2060                         !f2fs_block_unit_discard(sbi))
2061                 return false;
2062
2063         if (!force) {
2064                 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2065                         SM_I(sbi)->dcc_info->nr_discards >=
2066                                 SM_I(sbi)->dcc_info->max_discards)
2067                         return false;
2068         }
2069
2070         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2071         for (i = 0; i < entries; i++)
2072                 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2073                                 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2074
2075         while (force || SM_I(sbi)->dcc_info->nr_discards <=
2076                                 SM_I(sbi)->dcc_info->max_discards) {
2077                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
2078                 if (start >= max_blocks)
2079                         break;
2080
2081                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
2082                 if (force && start && end != max_blocks
2083                                         && (end - start) < cpc->trim_minlen)
2084                         continue;
2085
2086                 if (check_only)
2087                         return true;
2088
2089                 if (!de) {
2090                         de = f2fs_kmem_cache_alloc(discard_entry_slab,
2091                                                 GFP_F2FS_ZERO, true, NULL);
2092                         de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2093                         list_add_tail(&de->list, head);
2094                 }
2095
2096                 for (i = start; i < end; i++)
2097                         __set_bit_le(i, (void *)de->discard_map);
2098
2099                 SM_I(sbi)->dcc_info->nr_discards += end - start;
2100         }
2101         return false;
2102 }
2103
2104 static void release_discard_addr(struct discard_entry *entry)
2105 {
2106         list_del(&entry->list);
2107         kmem_cache_free(discard_entry_slab, entry);
2108 }
2109
2110 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2111 {
2112         struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2113         struct discard_entry *entry, *this;
2114
2115         /* drop caches */
2116         list_for_each_entry_safe(entry, this, head, list)
2117                 release_discard_addr(entry);
2118 }
2119
2120 /*
2121  * Should call f2fs_clear_prefree_segments after checkpoint is done.
2122  */
2123 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2124 {
2125         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2126         unsigned int segno;
2127
2128         mutex_lock(&dirty_i->seglist_lock);
2129         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2130                 __set_test_and_free(sbi, segno, false);
2131         mutex_unlock(&dirty_i->seglist_lock);
2132 }
2133
2134 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2135                                                 struct cp_control *cpc)
2136 {
2137         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2138         struct list_head *head = &dcc->entry_list;
2139         struct discard_entry *entry, *this;
2140         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2141         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2142         unsigned int start = 0, end = -1;
2143         unsigned int secno, start_segno;
2144         bool force = (cpc->reason & CP_DISCARD);
2145         bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2146                                                 DISCARD_UNIT_SECTION;
2147
2148         if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2149                 section_alignment = true;
2150
2151         mutex_lock(&dirty_i->seglist_lock);
2152
2153         while (1) {
2154                 int i;
2155
2156                 if (section_alignment && end != -1)
2157                         end--;
2158                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2159                 if (start >= MAIN_SEGS(sbi))
2160                         break;
2161                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2162                                                                 start + 1);
2163
2164                 if (section_alignment) {
2165                         start = rounddown(start, sbi->segs_per_sec);
2166                         end = roundup(end, sbi->segs_per_sec);
2167                 }
2168
2169                 for (i = start; i < end; i++) {
2170                         if (test_and_clear_bit(i, prefree_map))
2171                                 dirty_i->nr_dirty[PRE]--;
2172                 }
2173
2174                 if (!f2fs_realtime_discard_enable(sbi))
2175                         continue;
2176
2177                 if (force && start >= cpc->trim_start &&
2178                                         (end - 1) <= cpc->trim_end)
2179                         continue;
2180
2181                 /* Should cover 2MB zoned device for zone-based reset */
2182                 if (!f2fs_sb_has_blkzoned(sbi) &&
2183                     (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2184                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2185                                 (end - start) << sbi->log_blocks_per_seg);
2186                         continue;
2187                 }
2188 next:
2189                 secno = GET_SEC_FROM_SEG(sbi, start);
2190                 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2191                 if (!IS_CURSEC(sbi, secno) &&
2192                         !get_valid_blocks(sbi, start, true))
2193                         f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2194                                 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2195
2196                 start = start_segno + sbi->segs_per_sec;
2197                 if (start < end)
2198                         goto next;
2199                 else
2200                         end = start - 1;
2201         }
2202         mutex_unlock(&dirty_i->seglist_lock);
2203
2204         if (!f2fs_block_unit_discard(sbi))
2205                 goto wakeup;
2206
2207         /* send small discards */
2208         list_for_each_entry_safe(entry, this, head, list) {
2209                 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2210                 bool is_valid = test_bit_le(0, entry->discard_map);
2211
2212 find_next:
2213                 if (is_valid) {
2214                         next_pos = find_next_zero_bit_le(entry->discard_map,
2215                                         sbi->blocks_per_seg, cur_pos);
2216                         len = next_pos - cur_pos;
2217
2218                         if (f2fs_sb_has_blkzoned(sbi) ||
2219                             (force && len < cpc->trim_minlen))
2220                                 goto skip;
2221
2222                         f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2223                                                                         len);
2224                         total_len += len;
2225                 } else {
2226                         next_pos = find_next_bit_le(entry->discard_map,
2227                                         sbi->blocks_per_seg, cur_pos);
2228                 }
2229 skip:
2230                 cur_pos = next_pos;
2231                 is_valid = !is_valid;
2232
2233                 if (cur_pos < sbi->blocks_per_seg)
2234                         goto find_next;
2235
2236                 release_discard_addr(entry);
2237                 dcc->nr_discards -= total_len;
2238         }
2239
2240 wakeup:
2241         wake_up_discard_thread(sbi, false);
2242 }
2243
2244 int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2245 {
2246         dev_t dev = sbi->sb->s_bdev->bd_dev;
2247         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2248         int err = 0;
2249
2250         if (!f2fs_realtime_discard_enable(sbi))
2251                 return 0;
2252
2253         dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2254                                 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2255         if (IS_ERR(dcc->f2fs_issue_discard)) {
2256                 err = PTR_ERR(dcc->f2fs_issue_discard);
2257                 dcc->f2fs_issue_discard = NULL;
2258         }
2259
2260         return err;
2261 }
2262
2263 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2264 {
2265         struct discard_cmd_control *dcc;
2266         int err = 0, i;
2267
2268         if (SM_I(sbi)->dcc_info) {
2269                 dcc = SM_I(sbi)->dcc_info;
2270                 goto init_thread;
2271         }
2272
2273         dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2274         if (!dcc)
2275                 return -ENOMEM;
2276
2277         dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2278         dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2279         dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2280         dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
2281         if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2282                 dcc->discard_granularity = sbi->blocks_per_seg;
2283         else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2284                 dcc->discard_granularity = BLKS_PER_SEC(sbi);
2285
2286         INIT_LIST_HEAD(&dcc->entry_list);
2287         for (i = 0; i < MAX_PLIST_NUM; i++)
2288                 INIT_LIST_HEAD(&dcc->pend_list[i]);
2289         INIT_LIST_HEAD(&dcc->wait_list);
2290         INIT_LIST_HEAD(&dcc->fstrim_list);
2291         mutex_init(&dcc->cmd_lock);
2292         atomic_set(&dcc->issued_discard, 0);
2293         atomic_set(&dcc->queued_discard, 0);
2294         atomic_set(&dcc->discard_cmd_cnt, 0);
2295         dcc->nr_discards = 0;
2296         dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2297         dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2298         dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2299         dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2300         dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2301         dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2302         dcc->undiscard_blks = 0;
2303         dcc->next_pos = 0;
2304         dcc->root = RB_ROOT_CACHED;
2305         dcc->rbtree_check = false;
2306
2307         init_waitqueue_head(&dcc->discard_wait_queue);
2308         SM_I(sbi)->dcc_info = dcc;
2309 init_thread:
2310         err = f2fs_start_discard_thread(sbi);
2311         if (err) {
2312                 kfree(dcc);
2313                 SM_I(sbi)->dcc_info = NULL;
2314         }
2315
2316         return err;
2317 }
2318
2319 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2320 {
2321         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2322
2323         if (!dcc)
2324                 return;
2325
2326         f2fs_stop_discard_thread(sbi);
2327
2328         /*
2329          * Recovery can cache discard commands, so in error path of
2330          * fill_super(), it needs to give a chance to handle them.
2331          */
2332         f2fs_issue_discard_timeout(sbi);
2333
2334         kfree(dcc);
2335         SM_I(sbi)->dcc_info = NULL;
2336 }
2337
2338 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2339 {
2340         struct sit_info *sit_i = SIT_I(sbi);
2341
2342         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2343                 sit_i->dirty_sentries++;
2344                 return false;
2345         }
2346
2347         return true;
2348 }
2349
2350 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2351                                         unsigned int segno, int modified)
2352 {
2353         struct seg_entry *se = get_seg_entry(sbi, segno);
2354
2355         se->type = type;
2356         if (modified)
2357                 __mark_sit_entry_dirty(sbi, segno);
2358 }
2359
2360 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2361                                                                 block_t blkaddr)
2362 {
2363         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2364
2365         if (segno == NULL_SEGNO)
2366                 return 0;
2367         return get_seg_entry(sbi, segno)->mtime;
2368 }
2369
2370 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2371                                                 unsigned long long old_mtime)
2372 {
2373         struct seg_entry *se;
2374         unsigned int segno = GET_SEGNO(sbi, blkaddr);
2375         unsigned long long ctime = get_mtime(sbi, false);
2376         unsigned long long mtime = old_mtime ? old_mtime : ctime;
2377
2378         if (segno == NULL_SEGNO)
2379                 return;
2380
2381         se = get_seg_entry(sbi, segno);
2382
2383         if (!se->mtime)
2384                 se->mtime = mtime;
2385         else
2386                 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2387                                                 se->valid_blocks + 1);
2388
2389         if (ctime > SIT_I(sbi)->max_mtime)
2390                 SIT_I(sbi)->max_mtime = ctime;
2391 }
2392
2393 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2394 {
2395         struct seg_entry *se;
2396         unsigned int segno, offset;
2397         long int new_vblocks;
2398         bool exist;
2399 #ifdef CONFIG_F2FS_CHECK_FS
2400         bool mir_exist;
2401 #endif
2402
2403         segno = GET_SEGNO(sbi, blkaddr);
2404
2405         se = get_seg_entry(sbi, segno);
2406         new_vblocks = se->valid_blocks + del;
2407         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2408
2409         f2fs_bug_on(sbi, (new_vblocks < 0 ||
2410                         (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2411
2412         se->valid_blocks = new_vblocks;
2413
2414         /* Update valid block bitmap */
2415         if (del > 0) {
2416                 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2417 #ifdef CONFIG_F2FS_CHECK_FS
2418                 mir_exist = f2fs_test_and_set_bit(offset,
2419                                                 se->cur_valid_map_mir);
2420                 if (unlikely(exist != mir_exist)) {
2421                         f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2422                                  blkaddr, exist);
2423                         f2fs_bug_on(sbi, 1);
2424                 }
2425 #endif
2426                 if (unlikely(exist)) {
2427                         f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2428                                  blkaddr);
2429                         f2fs_bug_on(sbi, 1);
2430                         se->valid_blocks--;
2431                         del = 0;
2432                 }
2433
2434                 if (f2fs_block_unit_discard(sbi) &&
2435                                 !f2fs_test_and_set_bit(offset, se->discard_map))
2436                         sbi->discard_blks--;
2437
2438                 /*
2439                  * SSR should never reuse block which is checkpointed
2440                  * or newly invalidated.
2441                  */
2442                 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2443                         if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2444                                 se->ckpt_valid_blocks++;
2445                 }
2446         } else {
2447                 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2448 #ifdef CONFIG_F2FS_CHECK_FS
2449                 mir_exist = f2fs_test_and_clear_bit(offset,
2450                                                 se->cur_valid_map_mir);
2451                 if (unlikely(exist != mir_exist)) {
2452                         f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2453                                  blkaddr, exist);
2454                         f2fs_bug_on(sbi, 1);
2455                 }
2456 #endif
2457                 if (unlikely(!exist)) {
2458                         f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2459                                  blkaddr);
2460                         f2fs_bug_on(sbi, 1);
2461                         se->valid_blocks++;
2462                         del = 0;
2463                 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2464                         /*
2465                          * If checkpoints are off, we must not reuse data that
2466                          * was used in the previous checkpoint. If it was used
2467                          * before, we must track that to know how much space we
2468                          * really have.
2469                          */
2470                         if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2471                                 spin_lock(&sbi->stat_lock);
2472                                 sbi->unusable_block_count++;
2473                                 spin_unlock(&sbi->stat_lock);
2474                         }
2475                 }
2476
2477                 if (f2fs_block_unit_discard(sbi) &&
2478                         f2fs_test_and_clear_bit(offset, se->discard_map))
2479                         sbi->discard_blks++;
2480         }
2481         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2482                 se->ckpt_valid_blocks += del;
2483
2484         __mark_sit_entry_dirty(sbi, segno);
2485
2486         /* update total number of valid blocks to be written in ckpt area */
2487         SIT_I(sbi)->written_valid_blocks += del;
2488
2489         if (__is_large_section(sbi))
2490                 get_sec_entry(sbi, segno)->valid_blocks += del;
2491 }
2492
2493 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2494 {
2495         unsigned int segno = GET_SEGNO(sbi, addr);
2496         struct sit_info *sit_i = SIT_I(sbi);
2497
2498         f2fs_bug_on(sbi, addr == NULL_ADDR);
2499         if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2500                 return;
2501
2502         f2fs_invalidate_internal_cache(sbi, addr);
2503
2504         /* add it into sit main buffer */
2505         down_write(&sit_i->sentry_lock);
2506
2507         update_segment_mtime(sbi, addr, 0);
2508         update_sit_entry(sbi, addr, -1);
2509
2510         /* add it into dirty seglist */
2511         locate_dirty_segment(sbi, segno);
2512
2513         up_write(&sit_i->sentry_lock);
2514 }
2515
2516 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2517 {
2518         struct sit_info *sit_i = SIT_I(sbi);
2519         unsigned int segno, offset;
2520         struct seg_entry *se;
2521         bool is_cp = false;
2522
2523         if (!__is_valid_data_blkaddr(blkaddr))
2524                 return true;
2525
2526         down_read(&sit_i->sentry_lock);
2527
2528         segno = GET_SEGNO(sbi, blkaddr);
2529         se = get_seg_entry(sbi, segno);
2530         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2531
2532         if (f2fs_test_bit(offset, se->ckpt_valid_map))
2533                 is_cp = true;
2534
2535         up_read(&sit_i->sentry_lock);
2536
2537         return is_cp;
2538 }
2539
2540 static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2541 {
2542         struct curseg_info *curseg = CURSEG_I(sbi, type);
2543
2544         if (sbi->ckpt->alloc_type[type] == SSR)
2545                 return sbi->blocks_per_seg;
2546         return curseg->next_blkoff;
2547 }
2548
2549 /*
2550  * Calculate the number of current summary pages for writing
2551  */
2552 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2553 {
2554         int valid_sum_count = 0;
2555         int i, sum_in_page;
2556
2557         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2558                 if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2559                         valid_sum_count +=
2560                                 le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2561                 else
2562                         valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2563         }
2564
2565         sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2566                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2567         if (valid_sum_count <= sum_in_page)
2568                 return 1;
2569         else if ((valid_sum_count - sum_in_page) <=
2570                 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2571                 return 2;
2572         return 3;
2573 }
2574
2575 /*
2576  * Caller should put this summary page
2577  */
2578 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2579 {
2580         if (unlikely(f2fs_cp_error(sbi)))
2581                 return ERR_PTR(-EIO);
2582         return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2583 }
2584
2585 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2586                                         void *src, block_t blk_addr)
2587 {
2588         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2589
2590         memcpy(page_address(page), src, PAGE_SIZE);
2591         set_page_dirty(page);
2592         f2fs_put_page(page, 1);
2593 }
2594
2595 static void write_sum_page(struct f2fs_sb_info *sbi,
2596                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
2597 {
2598         f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2599 }
2600
2601 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2602                                                 int type, block_t blk_addr)
2603 {
2604         struct curseg_info *curseg = CURSEG_I(sbi, type);
2605         struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2606         struct f2fs_summary_block *src = curseg->sum_blk;
2607         struct f2fs_summary_block *dst;
2608
2609         dst = (struct f2fs_summary_block *)page_address(page);
2610         memset(dst, 0, PAGE_SIZE);
2611
2612         mutex_lock(&curseg->curseg_mutex);
2613
2614         down_read(&curseg->journal_rwsem);
2615         memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2616         up_read(&curseg->journal_rwsem);
2617
2618         memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2619         memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2620
2621         mutex_unlock(&curseg->curseg_mutex);
2622
2623         set_page_dirty(page);
2624         f2fs_put_page(page, 1);
2625 }
2626
2627 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2628                                 struct curseg_info *curseg, int type)
2629 {
2630         unsigned int segno = curseg->segno + 1;
2631         struct free_segmap_info *free_i = FREE_I(sbi);
2632
2633         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2634                 return !test_bit(segno, free_i->free_segmap);
2635         return 0;
2636 }
2637
2638 /*
2639  * Find a new segment from the free segments bitmap to right order
2640  * This function should be returned with success, otherwise BUG
2641  */
2642 static void get_new_segment(struct f2fs_sb_info *sbi,
2643                         unsigned int *newseg, bool new_sec, int dir)
2644 {
2645         struct free_segmap_info *free_i = FREE_I(sbi);
2646         unsigned int segno, secno, zoneno;
2647         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2648         unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2649         unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2650         unsigned int left_start = hint;
2651         bool init = true;
2652         int go_left = 0;
2653         int i;
2654
2655         spin_lock(&free_i->segmap_lock);
2656
2657         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2658                 segno = find_next_zero_bit(free_i->free_segmap,
2659                         GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2660                 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2661                         goto got_it;
2662         }
2663 find_other_zone:
2664         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2665         if (secno >= MAIN_SECS(sbi)) {
2666                 if (dir == ALLOC_RIGHT) {
2667                         secno = find_first_zero_bit(free_i->free_secmap,
2668                                                         MAIN_SECS(sbi));
2669                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2670                 } else {
2671                         go_left = 1;
2672                         left_start = hint - 1;
2673                 }
2674         }
2675         if (go_left == 0)
2676                 goto skip_left;
2677
2678         while (test_bit(left_start, free_i->free_secmap)) {
2679                 if (left_start > 0) {
2680                         left_start--;
2681                         continue;
2682                 }
2683                 left_start = find_first_zero_bit(free_i->free_secmap,
2684                                                         MAIN_SECS(sbi));
2685                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2686                 break;
2687         }
2688         secno = left_start;
2689 skip_left:
2690         segno = GET_SEG_FROM_SEC(sbi, secno);
2691         zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2692
2693         /* give up on finding another zone */
2694         if (!init)
2695                 goto got_it;
2696         if (sbi->secs_per_zone == 1)
2697                 goto got_it;
2698         if (zoneno == old_zoneno)
2699                 goto got_it;
2700         if (dir == ALLOC_LEFT) {
2701                 if (!go_left && zoneno + 1 >= total_zones)
2702                         goto got_it;
2703                 if (go_left && zoneno == 0)
2704                         goto got_it;
2705         }
2706         for (i = 0; i < NR_CURSEG_TYPE; i++)
2707                 if (CURSEG_I(sbi, i)->zone == zoneno)
2708                         break;
2709
2710         if (i < NR_CURSEG_TYPE) {
2711                 /* zone is in user, try another */
2712                 if (go_left)
2713                         hint = zoneno * sbi->secs_per_zone - 1;
2714                 else if (zoneno + 1 >= total_zones)
2715                         hint = 0;
2716                 else
2717                         hint = (zoneno + 1) * sbi->secs_per_zone;
2718                 init = false;
2719                 goto find_other_zone;
2720         }
2721 got_it:
2722         /* set it as dirty segment in free segmap */
2723         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2724         __set_inuse(sbi, segno);
2725         *newseg = segno;
2726         spin_unlock(&free_i->segmap_lock);
2727 }
2728
2729 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2730 {
2731         struct curseg_info *curseg = CURSEG_I(sbi, type);
2732         struct summary_footer *sum_footer;
2733         unsigned short seg_type = curseg->seg_type;
2734
2735         curseg->inited = true;
2736         curseg->segno = curseg->next_segno;
2737         curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2738         curseg->next_blkoff = 0;
2739         curseg->next_segno = NULL_SEGNO;
2740
2741         sum_footer = &(curseg->sum_blk->footer);
2742         memset(sum_footer, 0, sizeof(struct summary_footer));
2743
2744         sanity_check_seg_type(sbi, seg_type);
2745
2746         if (IS_DATASEG(seg_type))
2747                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2748         if (IS_NODESEG(seg_type))
2749                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2750         __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2751 }
2752
2753 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2754 {
2755         struct curseg_info *curseg = CURSEG_I(sbi, type);
2756         unsigned short seg_type = curseg->seg_type;
2757
2758         sanity_check_seg_type(sbi, seg_type);
2759         if (f2fs_need_rand_seg(sbi))
2760                 return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
2761
2762         /* if segs_per_sec is large than 1, we need to keep original policy. */
2763         if (__is_large_section(sbi))
2764                 return curseg->segno;
2765
2766         /* inmem log may not locate on any segment after mount */
2767         if (!curseg->inited)
2768                 return 0;
2769
2770         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2771                 return 0;
2772
2773         if (test_opt(sbi, NOHEAP) &&
2774                 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2775                 return 0;
2776
2777         if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2778                 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2779
2780         /* find segments from 0 to reuse freed segments */
2781         if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2782                 return 0;
2783
2784         return curseg->segno;
2785 }
2786
2787 /*
2788  * Allocate a current working segment.
2789  * This function always allocates a free segment in LFS manner.
2790  */
2791 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2792 {
2793         struct curseg_info *curseg = CURSEG_I(sbi, type);
2794         unsigned short seg_type = curseg->seg_type;
2795         unsigned int segno = curseg->segno;
2796         int dir = ALLOC_LEFT;
2797
2798         if (curseg->inited)
2799                 write_sum_page(sbi, curseg->sum_blk,
2800                                 GET_SUM_BLOCK(sbi, segno));
2801         if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2802                 dir = ALLOC_RIGHT;
2803
2804         if (test_opt(sbi, NOHEAP))
2805                 dir = ALLOC_RIGHT;
2806
2807         segno = __get_next_segno(sbi, type);
2808         get_new_segment(sbi, &segno, new_sec, dir);
2809         curseg->next_segno = segno;
2810         reset_curseg(sbi, type, 1);
2811         curseg->alloc_type = LFS;
2812         if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2813                 curseg->fragment_remained_chunk =
2814                                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2815 }
2816
2817 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2818                                         int segno, block_t start)
2819 {
2820         struct seg_entry *se = get_seg_entry(sbi, segno);
2821         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2822         unsigned long *target_map = SIT_I(sbi)->tmp_map;
2823         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2824         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2825         int i;
2826
2827         for (i = 0; i < entries; i++)
2828                 target_map[i] = ckpt_map[i] | cur_map[i];
2829
2830         return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2831 }
2832
2833 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2834                 struct curseg_info *seg)
2835 {
2836         return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2837 }
2838
2839 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2840 {
2841         return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
2842 }
2843
2844 /*
2845  * This function always allocates a used segment(from dirty seglist) by SSR
2846  * manner, so it should recover the existing segment information of valid blocks
2847  */
2848 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2849 {
2850         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2851         struct curseg_info *curseg = CURSEG_I(sbi, type);
2852         unsigned int new_segno = curseg->next_segno;
2853         struct f2fs_summary_block *sum_node;
2854         struct page *sum_page;
2855
2856         write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2857
2858         __set_test_and_inuse(sbi, new_segno);
2859
2860         mutex_lock(&dirty_i->seglist_lock);
2861         __remove_dirty_segment(sbi, new_segno, PRE);
2862         __remove_dirty_segment(sbi, new_segno, DIRTY);
2863         mutex_unlock(&dirty_i->seglist_lock);
2864
2865         reset_curseg(sbi, type, 1);
2866         curseg->alloc_type = SSR;
2867         curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2868
2869         sum_page = f2fs_get_sum_page(sbi, new_segno);
2870         if (IS_ERR(sum_page)) {
2871                 /* GC won't be able to use stale summary pages by cp_error */
2872                 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2873                 return;
2874         }
2875         sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2876         memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2877         f2fs_put_page(sum_page, 1);
2878 }
2879
2880 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2881                                 int alloc_mode, unsigned long long age);
2882
2883 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2884                                         int target_type, int alloc_mode,
2885                                         unsigned long long age)
2886 {
2887         struct curseg_info *curseg = CURSEG_I(sbi, type);
2888
2889         curseg->seg_type = target_type;
2890
2891         if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2892                 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2893
2894                 curseg->seg_type = se->type;
2895                 change_curseg(sbi, type);
2896         } else {
2897                 /* allocate cold segment by default */
2898                 curseg->seg_type = CURSEG_COLD_DATA;
2899                 new_curseg(sbi, type, true);
2900         }
2901         stat_inc_seg_type(sbi, curseg);
2902 }
2903
2904 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2905 {
2906         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2907
2908         if (!sbi->am.atgc_enabled)
2909                 return;
2910
2911         f2fs_down_read(&SM_I(sbi)->curseg_lock);
2912
2913         mutex_lock(&curseg->curseg_mutex);
2914         down_write(&SIT_I(sbi)->sentry_lock);
2915
2916         get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2917
2918         up_write(&SIT_I(sbi)->sentry_lock);
2919         mutex_unlock(&curseg->curseg_mutex);
2920
2921         f2fs_up_read(&SM_I(sbi)->curseg_lock);
2922
2923 }
2924 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2925 {
2926         __f2fs_init_atgc_curseg(sbi);
2927 }
2928
2929 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2930 {
2931         struct curseg_info *curseg = CURSEG_I(sbi, type);
2932
2933         mutex_lock(&curseg->curseg_mutex);
2934         if (!curseg->inited)
2935                 goto out;
2936
2937         if (get_valid_blocks(sbi, curseg->segno, false)) {
2938                 write_sum_page(sbi, curseg->sum_blk,
2939                                 GET_SUM_BLOCK(sbi, curseg->segno));
2940         } else {
2941                 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2942                 __set_test_and_free(sbi, curseg->segno, true);
2943                 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2944         }
2945 out:
2946         mutex_unlock(&curseg->curseg_mutex);
2947 }
2948
2949 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2950 {
2951         __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2952
2953         if (sbi->am.atgc_enabled)
2954                 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2955 }
2956
2957 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2958 {
2959         struct curseg_info *curseg = CURSEG_I(sbi, type);
2960
2961         mutex_lock(&curseg->curseg_mutex);
2962         if (!curseg->inited)
2963                 goto out;
2964         if (get_valid_blocks(sbi, curseg->segno, false))
2965                 goto out;
2966
2967         mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2968         __set_test_and_inuse(sbi, curseg->segno);
2969         mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2970 out:
2971         mutex_unlock(&curseg->curseg_mutex);
2972 }
2973
2974 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2975 {
2976         __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2977
2978         if (sbi->am.atgc_enabled)
2979                 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2980 }
2981
2982 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2983                                 int alloc_mode, unsigned long long age)
2984 {
2985         struct curseg_info *curseg = CURSEG_I(sbi, type);
2986         unsigned segno = NULL_SEGNO;
2987         unsigned short seg_type = curseg->seg_type;
2988         int i, cnt;
2989         bool reversed = false;
2990
2991         sanity_check_seg_type(sbi, seg_type);
2992
2993         /* f2fs_need_SSR() already forces to do this */
2994         if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2995                 curseg->next_segno = segno;
2996                 return 1;
2997         }
2998
2999         /* For node segments, let's do SSR more intensively */
3000         if (IS_NODESEG(seg_type)) {
3001                 if (seg_type >= CURSEG_WARM_NODE) {
3002                         reversed = true;
3003                         i = CURSEG_COLD_NODE;
3004                 } else {
3005                         i = CURSEG_HOT_NODE;
3006                 }
3007                 cnt = NR_CURSEG_NODE_TYPE;
3008         } else {
3009                 if (seg_type >= CURSEG_WARM_DATA) {
3010                         reversed = true;
3011                         i = CURSEG_COLD_DATA;
3012                 } else {
3013                         i = CURSEG_HOT_DATA;
3014                 }
3015                 cnt = NR_CURSEG_DATA_TYPE;
3016         }
3017
3018         for (; cnt-- > 0; reversed ? i-- : i++) {
3019                 if (i == seg_type)
3020                         continue;
3021                 if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
3022                         curseg->next_segno = segno;
3023                         return 1;
3024                 }
3025         }
3026
3027         /* find valid_blocks=0 in dirty list */
3028         if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3029                 segno = get_free_segment(sbi);
3030                 if (segno != NULL_SEGNO) {
3031                         curseg->next_segno = segno;
3032                         return 1;
3033                 }
3034         }
3035         return 0;
3036 }
3037
3038 static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3039 {
3040         struct curseg_info *curseg = CURSEG_I(sbi, type);
3041
3042         if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3043             curseg->seg_type == CURSEG_WARM_NODE)
3044                 return true;
3045         if (curseg->alloc_type == LFS &&
3046             is_next_segment_free(sbi, curseg, type) &&
3047             likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3048                 return true;
3049         if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3050                 return true;
3051         return false;
3052 }
3053
3054 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3055                                         unsigned int start, unsigned int end)
3056 {
3057         struct curseg_info *curseg = CURSEG_I(sbi, type);
3058         unsigned int segno;
3059
3060         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3061         mutex_lock(&curseg->curseg_mutex);
3062         down_write(&SIT_I(sbi)->sentry_lock);
3063
3064         segno = CURSEG_I(sbi, type)->segno;
3065         if (segno < start || segno > end)
3066                 goto unlock;
3067
3068         if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3069                 change_curseg(sbi, type);
3070         else
3071                 new_curseg(sbi, type, true);
3072
3073         stat_inc_seg_type(sbi, curseg);
3074
3075         locate_dirty_segment(sbi, segno);
3076 unlock:
3077         up_write(&SIT_I(sbi)->sentry_lock);
3078
3079         if (segno != curseg->segno)
3080                 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3081                             type, segno, curseg->segno);
3082
3083         mutex_unlock(&curseg->curseg_mutex);
3084         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3085 }
3086
3087 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3088                                                 bool new_sec, bool force)
3089 {
3090         struct curseg_info *curseg = CURSEG_I(sbi, type);
3091         unsigned int old_segno;
3092
3093         if (!force && curseg->inited &&
3094             !curseg->next_blkoff &&
3095             !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3096             !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3097                 return;
3098
3099         old_segno = curseg->segno;
3100         new_curseg(sbi, type, true);
3101         stat_inc_seg_type(sbi, curseg);
3102         locate_dirty_segment(sbi, old_segno);
3103 }
3104
3105 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3106 {
3107         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3108         down_write(&SIT_I(sbi)->sentry_lock);
3109         __allocate_new_segment(sbi, type, true, force);
3110         up_write(&SIT_I(sbi)->sentry_lock);
3111         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3112 }
3113
3114 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3115 {
3116         int i;
3117
3118         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3119         down_write(&SIT_I(sbi)->sentry_lock);
3120         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3121                 __allocate_new_segment(sbi, i, false, false);
3122         up_write(&SIT_I(sbi)->sentry_lock);
3123         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3124 }
3125
3126 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3127                                                 struct cp_control *cpc)
3128 {
3129         __u64 trim_start = cpc->trim_start;
3130         bool has_candidate = false;
3131
3132         down_write(&SIT_I(sbi)->sentry_lock);
3133         for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3134                 if (add_discard_addrs(sbi, cpc, true)) {
3135                         has_candidate = true;
3136                         break;
3137                 }
3138         }
3139         up_write(&SIT_I(sbi)->sentry_lock);
3140
3141         cpc->trim_start = trim_start;
3142         return has_candidate;
3143 }
3144
3145 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3146                                         struct discard_policy *dpolicy,
3147                                         unsigned int start, unsigned int end)
3148 {
3149         struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3150         struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3151         struct rb_node **insert_p = NULL, *insert_parent = NULL;
3152         struct discard_cmd *dc;
3153         struct blk_plug plug;
3154         int issued;
3155         unsigned int trimmed = 0;
3156
3157 next:
3158         issued = 0;
3159
3160         mutex_lock(&dcc->cmd_lock);
3161         if (unlikely(dcc->rbtree_check))
3162                 f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3163
3164         dc = __lookup_discard_cmd_ret(&dcc->root, start,
3165                                 &prev_dc, &next_dc, &insert_p, &insert_parent);
3166         if (!dc)
3167                 dc = next_dc;
3168
3169         blk_start_plug(&plug);
3170
3171         while (dc && dc->di.lstart <= end) {
3172                 struct rb_node *node;
3173                 int err = 0;
3174
3175                 if (dc->di.len < dpolicy->granularity)
3176                         goto skip;
3177
3178                 if (dc->state != D_PREP) {
3179                         list_move_tail(&dc->list, &dcc->fstrim_list);
3180                         goto skip;
3181                 }
3182
3183                 err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3184
3185                 if (issued >= dpolicy->max_requests) {
3186                         start = dc->di.lstart + dc->di.len;
3187
3188                         if (err)
3189                                 __remove_discard_cmd(sbi, dc);
3190
3191                         blk_finish_plug(&plug);
3192                         mutex_unlock(&dcc->cmd_lock);
3193                         trimmed += __wait_all_discard_cmd(sbi, NULL);
3194                         f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3195                         goto next;
3196                 }
3197 skip:
3198                 node = rb_next(&dc->rb_node);
3199                 if (err)
3200                         __remove_discard_cmd(sbi, dc);
3201                 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3202
3203                 if (fatal_signal_pending(current))
3204                         break;
3205         }
3206
3207         blk_finish_plug(&plug);
3208         mutex_unlock(&dcc->cmd_lock);
3209
3210         return trimmed;
3211 }
3212
3213 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3214 {
3215         __u64 start = F2FS_BYTES_TO_BLK(range->start);
3216         __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3217         unsigned int start_segno, end_segno;
3218         block_t start_block, end_block;
3219         struct cp_control cpc;
3220         struct discard_policy dpolicy;
3221         unsigned long long trimmed = 0;
3222         int err = 0;
3223         bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3224
3225         if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3226                 return -EINVAL;
3227
3228         if (end < MAIN_BLKADDR(sbi))
3229                 goto out;
3230
3231         if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3232                 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3233                 return -EFSCORRUPTED;
3234         }
3235
3236         /* start/end segment number in main_area */
3237         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3238         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3239                                                 GET_SEGNO(sbi, end);
3240         if (need_align) {
3241                 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3242                 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3243         }
3244
3245         cpc.reason = CP_DISCARD;
3246         cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3247         cpc.trim_start = start_segno;
3248         cpc.trim_end = end_segno;
3249
3250         if (sbi->discard_blks == 0)
3251                 goto out;
3252
3253         f2fs_down_write(&sbi->gc_lock);
3254         stat_inc_cp_call_count(sbi, TOTAL_CALL);
3255         err = f2fs_write_checkpoint(sbi, &cpc);
3256         f2fs_up_write(&sbi->gc_lock);
3257         if (err)
3258                 goto out;
3259
3260         /*
3261          * We filed discard candidates, but actually we don't need to wait for
3262          * all of them, since they'll be issued in idle time along with runtime
3263          * discard option. User configuration looks like using runtime discard
3264          * or periodic fstrim instead of it.
3265          */
3266         if (f2fs_realtime_discard_enable(sbi))
3267                 goto out;
3268
3269         start_block = START_BLOCK(sbi, start_segno);
3270         end_block = START_BLOCK(sbi, end_segno + 1);
3271
3272         __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3273         trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3274                                         start_block, end_block);
3275
3276         trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3277                                         start_block, end_block);
3278 out:
3279         if (!err)
3280                 range->len = F2FS_BLK_TO_BYTES(trimmed);
3281         return err;
3282 }
3283
3284 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3285 {
3286         switch (hint) {
3287         case WRITE_LIFE_SHORT:
3288                 return CURSEG_HOT_DATA;
3289         case WRITE_LIFE_EXTREME:
3290                 return CURSEG_COLD_DATA;
3291         default:
3292                 return CURSEG_WARM_DATA;
3293         }
3294 }
3295
3296 static int __get_segment_type_2(struct f2fs_io_info *fio)
3297 {
3298         if (fio->type == DATA)
3299                 return CURSEG_HOT_DATA;
3300         else
3301                 return CURSEG_HOT_NODE;
3302 }
3303
3304 static int __get_segment_type_4(struct f2fs_io_info *fio)
3305 {
3306         if (fio->type == DATA) {
3307                 struct inode *inode = fio->page->mapping->host;
3308
3309                 if (S_ISDIR(inode->i_mode))
3310                         return CURSEG_HOT_DATA;
3311                 else
3312                         return CURSEG_COLD_DATA;
3313         } else {
3314                 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3315                         return CURSEG_WARM_NODE;
3316                 else
3317                         return CURSEG_COLD_NODE;
3318         }
3319 }
3320
3321 static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3322 {
3323         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3324         struct extent_info ei = {};
3325
3326         if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3327                 if (!ei.age)
3328                         return NO_CHECK_TYPE;
3329                 if (ei.age <= sbi->hot_data_age_threshold)
3330                         return CURSEG_HOT_DATA;
3331                 if (ei.age <= sbi->warm_data_age_threshold)
3332                         return CURSEG_WARM_DATA;
3333                 return CURSEG_COLD_DATA;
3334         }
3335         return NO_CHECK_TYPE;
3336 }
3337
3338 static int __get_segment_type_6(struct f2fs_io_info *fio)
3339 {
3340         if (fio->type == DATA) {
3341                 struct inode *inode = fio->page->mapping->host;
3342                 int type;
3343
3344                 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3345                         return CURSEG_COLD_DATA_PINNED;
3346
3347                 if (page_private_gcing(fio->page)) {
3348                         if (fio->sbi->am.atgc_enabled &&
3349                                 (fio->io_type == FS_DATA_IO) &&
3350                                 (fio->sbi->gc_mode != GC_URGENT_HIGH))
3351                                 return CURSEG_ALL_DATA_ATGC;
3352                         else
3353                                 return CURSEG_COLD_DATA;
3354                 }
3355                 if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3356                         return CURSEG_COLD_DATA;
3357
3358                 type = __get_age_segment_type(inode, fio->page->index);
3359                 if (type != NO_CHECK_TYPE)
3360                         return type;
3361
3362                 if (file_is_hot(inode) ||
3363                                 is_inode_flag_set(inode, FI_HOT_DATA) ||
3364                                 f2fs_is_cow_file(inode))
3365                         return CURSEG_HOT_DATA;
3366                 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3367         } else {
3368                 if (IS_DNODE(fio->page))
3369                         return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3370                                                 CURSEG_HOT_NODE;
3371                 return CURSEG_COLD_NODE;
3372         }
3373 }
3374
3375 static int __get_segment_type(struct f2fs_io_info *fio)
3376 {
3377         int type = 0;
3378
3379         switch (F2FS_OPTION(fio->sbi).active_logs) {
3380         case 2:
3381                 type = __get_segment_type_2(fio);
3382                 break;
3383         case 4:
3384                 type = __get_segment_type_4(fio);
3385                 break;
3386         case 6:
3387                 type = __get_segment_type_6(fio);
3388                 break;
3389         default:
3390                 f2fs_bug_on(fio->sbi, true);
3391         }
3392
3393         if (IS_HOT(type))
3394                 fio->temp = HOT;
3395         else if (IS_WARM(type))
3396                 fio->temp = WARM;
3397         else
3398                 fio->temp = COLD;
3399         return type;
3400 }
3401
3402 static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3403                 struct curseg_info *seg)
3404 {
3405         /* To allocate block chunks in different sizes, use random number */
3406         if (--seg->fragment_remained_chunk > 0)
3407                 return;
3408
3409         seg->fragment_remained_chunk =
3410                 get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3411         seg->next_blkoff +=
3412                 get_random_u32_inclusive(1, sbi->max_fragment_hole);
3413 }
3414
3415 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3416                 block_t old_blkaddr, block_t *new_blkaddr,
3417                 struct f2fs_summary *sum, int type,
3418                 struct f2fs_io_info *fio)
3419 {
3420         struct sit_info *sit_i = SIT_I(sbi);
3421         struct curseg_info *curseg = CURSEG_I(sbi, type);
3422         unsigned long long old_mtime;
3423         bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3424         struct seg_entry *se = NULL;
3425         bool segment_full = false;
3426
3427         f2fs_down_read(&SM_I(sbi)->curseg_lock);
3428
3429         mutex_lock(&curseg->curseg_mutex);
3430         down_write(&sit_i->sentry_lock);
3431
3432         if (from_gc) {
3433                 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3434                 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3435                 sanity_check_seg_type(sbi, se->type);
3436                 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3437         }
3438         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3439
3440         f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3441
3442         f2fs_wait_discard_bio(sbi, *new_blkaddr);
3443
3444         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3445         if (curseg->alloc_type == SSR) {
3446                 curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3447         } else {
3448                 curseg->next_blkoff++;
3449                 if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3450                         f2fs_randomize_chunk(sbi, curseg);
3451         }
3452         if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3453                 segment_full = true;
3454         stat_inc_block_count(sbi, curseg);
3455
3456         if (from_gc) {
3457                 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3458         } else {
3459                 update_segment_mtime(sbi, old_blkaddr, 0);
3460                 old_mtime = 0;
3461         }
3462         update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3463
3464         /*
3465          * SIT information should be updated before segment allocation,
3466          * since SSR needs latest valid block information.
3467          */
3468         update_sit_entry(sbi, *new_blkaddr, 1);
3469         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3470                 update_sit_entry(sbi, old_blkaddr, -1);
3471
3472         /*
3473          * If the current segment is full, flush it out and replace it with a
3474          * new segment.
3475          */
3476         if (segment_full) {
3477                 if (from_gc) {
3478                         get_atssr_segment(sbi, type, se->type,
3479                                                 AT_SSR, se->mtime);
3480                 } else {
3481                         if (need_new_seg(sbi, type))
3482                                 new_curseg(sbi, type, false);
3483                         else
3484                                 change_curseg(sbi, type);
3485                         stat_inc_seg_type(sbi, curseg);
3486                 }
3487         }
3488         /*
3489          * segment dirty status should be updated after segment allocation,
3490          * so we just need to update status only one time after previous
3491          * segment being closed.
3492          */
3493         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3494         locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3495
3496         if (IS_DATASEG(curseg->seg_type))
3497                 atomic64_inc(&sbi->allocated_data_blocks);
3498
3499         up_write(&sit_i->sentry_lock);
3500
3501         if (page && IS_NODESEG(curseg->seg_type)) {
3502                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3503
3504                 f2fs_inode_chksum_set(sbi, page);
3505         }
3506
3507         if (fio) {
3508                 struct f2fs_bio_info *io;
3509
3510                 if (F2FS_IO_ALIGNED(sbi))
3511                         fio->retry = 0;
3512
3513                 INIT_LIST_HEAD(&fio->list);
3514                 fio->in_list = 1;
3515                 io = sbi->write_io[fio->type] + fio->temp;
3516                 spin_lock(&io->io_lock);
3517                 list_add_tail(&fio->list, &io->io_list);
3518                 spin_unlock(&io->io_lock);
3519         }
3520
3521         mutex_unlock(&curseg->curseg_mutex);
3522
3523         f2fs_up_read(&SM_I(sbi)->curseg_lock);
3524 }
3525
3526 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3527                                         block_t blkaddr, unsigned int blkcnt)
3528 {
3529         if (!f2fs_is_multi_device(sbi))
3530                 return;
3531
3532         while (1) {
3533                 unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3534                 unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3535
3536                 /* update device state for fsync */
3537                 f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3538
3539                 /* update device state for checkpoint */
3540                 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3541                         spin_lock(&sbi->dev_lock);
3542                         f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3543                         spin_unlock(&sbi->dev_lock);
3544                 }
3545
3546                 if (blkcnt <= blks)
3547                         break;
3548                 blkcnt -= blks;
3549                 blkaddr += blks;
3550         }
3551 }
3552
3553 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3554 {
3555         int type = __get_segment_type(fio);
3556         bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3557
3558         if (keep_order)
3559                 f2fs_down_read(&fio->sbi->io_order_lock);
3560 reallocate:
3561         f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3562                         &fio->new_blkaddr, sum, type, fio);
3563         if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3564                 f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3565
3566         /* writeout dirty page into bdev */
3567         f2fs_submit_page_write(fio);
3568         if (fio->retry) {
3569                 fio->old_blkaddr = fio->new_blkaddr;
3570                 goto reallocate;
3571         }
3572
3573         f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3574
3575         if (keep_order)
3576                 f2fs_up_read(&fio->sbi->io_order_lock);
3577 }
3578
3579 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3580                                         enum iostat_type io_type)
3581 {
3582         struct f2fs_io_info fio = {
3583                 .sbi = sbi,
3584                 .type = META,
3585                 .temp = HOT,
3586                 .op = REQ_OP_WRITE,
3587                 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3588                 .old_blkaddr = page->index,
3589                 .new_blkaddr = page->index,
3590                 .page = page,
3591                 .encrypted_page = NULL,
3592                 .in_list = 0,
3593         };
3594
3595         if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3596                 fio.op_flags &= ~REQ_META;
3597
3598         set_page_writeback(page);
3599         f2fs_submit_page_write(&fio);
3600
3601         stat_inc_meta_count(sbi, page->index);
3602         f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3603 }
3604
3605 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3606 {
3607         struct f2fs_summary sum;
3608
3609         set_summary(&sum, nid, 0, 0);
3610         do_write_page(&sum, fio);
3611
3612         f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3613 }
3614
3615 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3616                                         struct f2fs_io_info *fio)
3617 {
3618         struct f2fs_sb_info *sbi = fio->sbi;
3619         struct f2fs_summary sum;
3620
3621         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3622         if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3623                 f2fs_update_age_extent_cache(dn);
3624         set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3625         do_write_page(&sum, fio);
3626         f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3627
3628         f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3629 }
3630
3631 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3632 {
3633         int err;
3634         struct f2fs_sb_info *sbi = fio->sbi;
3635         unsigned int segno;
3636
3637         fio->new_blkaddr = fio->old_blkaddr;
3638         /* i/o temperature is needed for passing down write hints */
3639         __get_segment_type(fio);
3640
3641         segno = GET_SEGNO(sbi, fio->new_blkaddr);
3642
3643         if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3644                 set_sbi_flag(sbi, SBI_NEED_FSCK);
3645                 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3646                           __func__, segno);
3647                 err = -EFSCORRUPTED;
3648                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3649                 goto drop_bio;
3650         }
3651
3652         if (f2fs_cp_error(sbi)) {
3653                 err = -EIO;
3654                 goto drop_bio;
3655         }
3656
3657         if (fio->post_read)
3658                 f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3659
3660         stat_inc_inplace_blocks(fio->sbi);
3661
3662         if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3663                 err = f2fs_merge_page_bio(fio);
3664         else
3665                 err = f2fs_submit_page_bio(fio);
3666         if (!err) {
3667                 f2fs_update_device_state(fio->sbi, fio->ino,
3668                                                 fio->new_blkaddr, 1);
3669                 f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3670                                                 fio->io_type, F2FS_BLKSIZE);
3671         }
3672
3673         return err;
3674 drop_bio:
3675         if (fio->bio && *(fio->bio)) {
3676                 struct bio *bio = *(fio->bio);
3677
3678                 bio->bi_status = BLK_STS_IOERR;
3679                 bio_endio(bio);
3680                 *(fio->bio) = NULL;
3681         }
3682         return err;
3683 }
3684
3685 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3686                                                 unsigned int segno)
3687 {
3688         int i;
3689
3690         for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3691                 if (CURSEG_I(sbi, i)->segno == segno)
3692                         break;
3693         }
3694         return i;
3695 }
3696
3697 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3698                                 block_t old_blkaddr, block_t new_blkaddr,
3699                                 bool recover_curseg, bool recover_newaddr,
3700                                 bool from_gc)
3701 {
3702         struct sit_info *sit_i = SIT_I(sbi);
3703         struct curseg_info *curseg;
3704         unsigned int segno, old_cursegno;
3705         struct seg_entry *se;
3706         int type;
3707         unsigned short old_blkoff;
3708         unsigned char old_alloc_type;
3709
3710         segno = GET_SEGNO(sbi, new_blkaddr);
3711         se = get_seg_entry(sbi, segno);
3712         type = se->type;
3713
3714         f2fs_down_write(&SM_I(sbi)->curseg_lock);
3715
3716         if (!recover_curseg) {
3717                 /* for recovery flow */
3718                 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3719                         if (old_blkaddr == NULL_ADDR)
3720                                 type = CURSEG_COLD_DATA;
3721                         else
3722                                 type = CURSEG_WARM_DATA;
3723                 }
3724         } else {
3725                 if (IS_CURSEG(sbi, segno)) {
3726                         /* se->type is volatile as SSR allocation */
3727                         type = __f2fs_get_curseg(sbi, segno);
3728                         f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3729                 } else {
3730                         type = CURSEG_WARM_DATA;
3731                 }
3732         }
3733
3734         f2fs_bug_on(sbi, !IS_DATASEG(type));
3735         curseg = CURSEG_I(sbi, type);
3736
3737         mutex_lock(&curseg->curseg_mutex);
3738         down_write(&sit_i->sentry_lock);
3739
3740         old_cursegno = curseg->segno;
3741         old_blkoff = curseg->next_blkoff;
3742         old_alloc_type = curseg->alloc_type;
3743
3744         /* change the current segment */
3745         if (segno != curseg->segno) {
3746                 curseg->next_segno = segno;
3747                 change_curseg(sbi, type);
3748         }
3749
3750         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3751         curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3752
3753         if (!recover_curseg || recover_newaddr) {
3754                 if (!from_gc)
3755                         update_segment_mtime(sbi, new_blkaddr, 0);
3756                 update_sit_entry(sbi, new_blkaddr, 1);
3757         }
3758         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3759                 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
3760                 if (!from_gc)
3761                         update_segment_mtime(sbi, old_blkaddr, 0);
3762                 update_sit_entry(sbi, old_blkaddr, -1);
3763         }
3764
3765         locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3766         locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3767
3768         locate_dirty_segment(sbi, old_cursegno);
3769
3770         if (recover_curseg) {
3771                 if (old_cursegno != curseg->segno) {
3772                         curseg->next_segno = old_cursegno;
3773                         change_curseg(sbi, type);
3774                 }
3775                 curseg->next_blkoff = old_blkoff;
3776                 curseg->alloc_type = old_alloc_type;
3777         }
3778
3779         up_write(&sit_i->sentry_lock);
3780         mutex_unlock(&curseg->curseg_mutex);
3781         f2fs_up_write(&SM_I(sbi)->curseg_lock);
3782 }
3783
3784 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3785                                 block_t old_addr, block_t new_addr,
3786                                 unsigned char version, bool recover_curseg,
3787                                 bool recover_newaddr)
3788 {
3789         struct f2fs_summary sum;
3790
3791         set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3792
3793         f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3794                                         recover_curseg, recover_newaddr, false);
3795
3796         f2fs_update_data_blkaddr(dn, new_addr);
3797 }
3798
3799 void f2fs_wait_on_page_writeback(struct page *page,
3800                                 enum page_type type, bool ordered, bool locked)
3801 {
3802         if (PageWriteback(page)) {
3803                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3804
3805                 /* submit cached LFS IO */
3806                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3807                 /* submit cached IPU IO */
3808                 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3809                 if (ordered) {
3810                         wait_on_page_writeback(page);
3811                         f2fs_bug_on(sbi, locked && PageWriteback(page));
3812                 } else {
3813                         wait_for_stable_page(page);
3814                 }
3815         }
3816 }
3817
3818 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3819 {
3820         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3821         struct page *cpage;
3822
3823         if (!f2fs_post_read_required(inode))
3824                 return;
3825
3826         if (!__is_valid_data_blkaddr(blkaddr))
3827                 return;
3828
3829         cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3830         if (cpage) {
3831                 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3832                 f2fs_put_page(cpage, 1);
3833         }
3834 }
3835
3836 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3837                                                                 block_t len)
3838 {
3839         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3840         block_t i;
3841
3842         if (!f2fs_post_read_required(inode))
3843                 return;
3844
3845         for (i = 0; i < len; i++)
3846                 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3847
3848         f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
3849 }
3850
3851 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3852 {
3853         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3854         struct curseg_info *seg_i;
3855         unsigned char *kaddr;
3856         struct page *page;
3857         block_t start;
3858         int i, j, offset;
3859
3860         start = start_sum_block(sbi);
3861
3862         page = f2fs_get_meta_page(sbi, start++);
3863         if (IS_ERR(page))
3864                 return PTR_ERR(page);
3865         kaddr = (unsigned char *)page_address(page);
3866
3867         /* Step 1: restore nat cache */
3868         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3869         memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3870
3871         /* Step 2: restore sit cache */
3872         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3873         memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3874         offset = 2 * SUM_JOURNAL_SIZE;
3875
3876         /* Step 3: restore summary entries */
3877         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3878                 unsigned short blk_off;
3879                 unsigned int segno;
3880
3881                 seg_i = CURSEG_I(sbi, i);
3882                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3883                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3884                 seg_i->next_segno = segno;
3885                 reset_curseg(sbi, i, 0);
3886                 seg_i->alloc_type = ckpt->alloc_type[i];
3887                 seg_i->next_blkoff = blk_off;
3888
3889                 if (seg_i->alloc_type == SSR)
3890                         blk_off = sbi->blocks_per_seg;
3891
3892                 for (j = 0; j < blk_off; j++) {
3893                         struct f2fs_summary *s;
3894
3895                         s = (struct f2fs_summary *)(kaddr + offset);
3896                         seg_i->sum_blk->entries[j] = *s;
3897                         offset += SUMMARY_SIZE;
3898                         if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3899                                                 SUM_FOOTER_SIZE)
3900                                 continue;
3901
3902                         f2fs_put_page(page, 1);
3903                         page = NULL;
3904
3905                         page = f2fs_get_meta_page(sbi, start++);
3906                         if (IS_ERR(page))
3907                                 return PTR_ERR(page);
3908                         kaddr = (unsigned char *)page_address(page);
3909                         offset = 0;
3910                 }
3911         }
3912         f2fs_put_page(page, 1);
3913         return 0;
3914 }
3915
3916 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3917 {
3918         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3919         struct f2fs_summary_block *sum;
3920         struct curseg_info *curseg;
3921         struct page *new;
3922         unsigned short blk_off;
3923         unsigned int segno = 0;
3924         block_t blk_addr = 0;
3925         int err = 0;
3926
3927         /* get segment number and block addr */
3928         if (IS_DATASEG(type)) {
3929                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3930                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3931                                                         CURSEG_HOT_DATA]);
3932                 if (__exist_node_summaries(sbi))
3933                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3934                 else
3935                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3936         } else {
3937                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3938                                                         CURSEG_HOT_NODE]);
3939                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3940                                                         CURSEG_HOT_NODE]);
3941                 if (__exist_node_summaries(sbi))
3942                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3943                                                         type - CURSEG_HOT_NODE);
3944                 else
3945                         blk_addr = GET_SUM_BLOCK(sbi, segno);
3946         }
3947
3948         new = f2fs_get_meta_page(sbi, blk_addr);
3949         if (IS_ERR(new))
3950                 return PTR_ERR(new);
3951         sum = (struct f2fs_summary_block *)page_address(new);
3952
3953         if (IS_NODESEG(type)) {
3954                 if (__exist_node_summaries(sbi)) {
3955                         struct f2fs_summary *ns = &sum->entries[0];
3956                         int i;
3957
3958                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3959                                 ns->version = 0;
3960                                 ns->ofs_in_node = 0;
3961                         }
3962                 } else {
3963                         err = f2fs_restore_node_summary(sbi, segno, sum);
3964                         if (err)
3965                                 goto out;
3966                 }
3967         }
3968
3969         /* set uncompleted segment to curseg */
3970         curseg = CURSEG_I(sbi, type);
3971         mutex_lock(&curseg->curseg_mutex);
3972
3973         /* update journal info */
3974         down_write(&curseg->journal_rwsem);
3975         memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3976         up_write(&curseg->journal_rwsem);
3977
3978         memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3979         memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3980         curseg->next_segno = segno;
3981         reset_curseg(sbi, type, 0);
3982         curseg->alloc_type = ckpt->alloc_type[type];
3983         curseg->next_blkoff = blk_off;
3984         mutex_unlock(&curseg->curseg_mutex);
3985 out:
3986         f2fs_put_page(new, 1);
3987         return err;
3988 }
3989
3990 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3991 {
3992         struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3993         struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3994         int type = CURSEG_HOT_DATA;
3995         int err;
3996
3997         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3998                 int npages = f2fs_npages_for_summary_flush(sbi, true);
3999
4000                 if (npages >= 2)
4001                         f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4002                                                         META_CP, true);
4003
4004                 /* restore for compacted data summary */
4005                 err = read_compacted_summaries(sbi);
4006                 if (err)
4007                         return err;
4008                 type = CURSEG_HOT_NODE;
4009         }
4010
4011         if (__exist_node_summaries(sbi))
4012                 f2fs_ra_meta_pages(sbi,
4013                                 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4014                                 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4015
4016         for (; type <= CURSEG_COLD_NODE; type++) {
4017                 err = read_normal_summaries(sbi, type);
4018                 if (err)
4019                         return err;
4020         }
4021
4022         /* sanity check for summary blocks */
4023         if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4024                         sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4025                 f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4026                          nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4027                 return -EINVAL;
4028         }
4029
4030         return 0;
4031 }
4032
4033 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4034 {
4035         struct page *page;
4036         unsigned char *kaddr;
4037         struct f2fs_summary *summary;
4038         struct curseg_info *seg_i;
4039         int written_size = 0;
4040         int i, j;
4041
4042         page = f2fs_grab_meta_page(sbi, blkaddr++);
4043         kaddr = (unsigned char *)page_address(page);
4044         memset(kaddr, 0, PAGE_SIZE);
4045
4046         /* Step 1: write nat cache */
4047         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4048         memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4049         written_size += SUM_JOURNAL_SIZE;
4050
4051         /* Step 2: write sit cache */
4052         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4053         memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4054         written_size += SUM_JOURNAL_SIZE;
4055
4056         /* Step 3: write summary entries */
4057         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4058                 seg_i = CURSEG_I(sbi, i);
4059                 for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4060                         if (!page) {
4061                                 page = f2fs_grab_meta_page(sbi, blkaddr++);
4062                                 kaddr = (unsigned char *)page_address(page);
4063                                 memset(kaddr, 0, PAGE_SIZE);
4064                                 written_size = 0;
4065                         }
4066                         summary = (struct f2fs_summary *)(kaddr + written_size);
4067                         *summary = seg_i->sum_blk->entries[j];
4068                         written_size += SUMMARY_SIZE;
4069
4070                         if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4071                                                         SUM_FOOTER_SIZE)
4072                                 continue;
4073
4074                         set_page_dirty(page);
4075                         f2fs_put_page(page, 1);
4076                         page = NULL;
4077                 }
4078         }
4079         if (page) {
4080                 set_page_dirty(page);
4081                 f2fs_put_page(page, 1);
4082         }
4083 }
4084
4085 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4086                                         block_t blkaddr, int type)
4087 {
4088         int i, end;
4089
4090         if (IS_DATASEG(type))
4091                 end = type + NR_CURSEG_DATA_TYPE;
4092         else
4093                 end = type + NR_CURSEG_NODE_TYPE;
4094
4095         for (i = type; i < end; i++)
4096                 write_current_sum_page(sbi, i, blkaddr + (i - type));
4097 }
4098
4099 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4100 {
4101         if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4102                 write_compacted_summaries(sbi, start_blk);
4103         else
4104                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4105 }
4106
4107 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4108 {
4109         write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4110 }
4111
4112 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4113                                         unsigned int val, int alloc)
4114 {
4115         int i;
4116
4117         if (type == NAT_JOURNAL) {
4118                 for (i = 0; i < nats_in_cursum(journal); i++) {
4119                         if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4120                                 return i;
4121                 }
4122                 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4123                         return update_nats_in_cursum(journal, 1);
4124         } else if (type == SIT_JOURNAL) {
4125                 for (i = 0; i < sits_in_cursum(journal); i++)
4126                         if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4127                                 return i;
4128                 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4129                         return update_sits_in_cursum(journal, 1);
4130         }
4131         return -1;
4132 }
4133
4134 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4135                                         unsigned int segno)
4136 {
4137         return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4138 }
4139
4140 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4141                                         unsigned int start)
4142 {
4143         struct sit_info *sit_i = SIT_I(sbi);
4144         struct page *page;
4145         pgoff_t src_off, dst_off;
4146
4147         src_off = current_sit_addr(sbi, start);
4148         dst_off = next_sit_addr(sbi, src_off);
4149
4150         page = f2fs_grab_meta_page(sbi, dst_off);
4151         seg_info_to_sit_page(sbi, page, start);
4152
4153         set_page_dirty(page);
4154         set_to_next_sit(sit_i, start);
4155
4156         return page;
4157 }
4158
4159 static struct sit_entry_set *grab_sit_entry_set(void)
4160 {
4161         struct sit_entry_set *ses =
4162                         f2fs_kmem_cache_alloc(sit_entry_set_slab,
4163                                                 GFP_NOFS, true, NULL);
4164
4165         ses->entry_cnt = 0;
4166         INIT_LIST_HEAD(&ses->set_list);
4167         return ses;
4168 }
4169
4170 static void release_sit_entry_set(struct sit_entry_set *ses)
4171 {
4172         list_del(&ses->set_list);
4173         kmem_cache_free(sit_entry_set_slab, ses);
4174 }
4175
4176 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4177                                                 struct list_head *head)
4178 {
4179         struct sit_entry_set *next = ses;
4180
4181         if (list_is_last(&ses->set_list, head))
4182                 return;
4183
4184         list_for_each_entry_continue(next, head, set_list)
4185                 if (ses->entry_cnt <= next->entry_cnt) {
4186                         list_move_tail(&ses->set_list, &next->set_list);
4187                         return;
4188                 }
4189
4190         list_move_tail(&ses->set_list, head);
4191 }
4192
4193 static void add_sit_entry(unsigned int segno, struct list_head *head)
4194 {
4195         struct sit_entry_set *ses;
4196         unsigned int start_segno = START_SEGNO(segno);
4197
4198         list_for_each_entry(ses, head, set_list) {
4199                 if (ses->start_segno == start_segno) {
4200                         ses->entry_cnt++;
4201                         adjust_sit_entry_set(ses, head);
4202                         return;
4203                 }
4204         }
4205
4206         ses = grab_sit_entry_set();
4207
4208         ses->start_segno = start_segno;
4209         ses->entry_cnt++;
4210         list_add(&ses->set_list, head);
4211 }
4212
4213 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4214 {
4215         struct f2fs_sm_info *sm_info = SM_I(sbi);
4216         struct list_head *set_list = &sm_info->sit_entry_set;
4217         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4218         unsigned int segno;
4219
4220         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4221                 add_sit_entry(segno, set_list);
4222 }
4223
4224 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4225 {
4226         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4227         struct f2fs_journal *journal = curseg->journal;
4228         int i;
4229
4230         down_write(&curseg->journal_rwsem);
4231         for (i = 0; i < sits_in_cursum(journal); i++) {
4232                 unsigned int segno;
4233                 bool dirtied;
4234
4235                 segno = le32_to_cpu(segno_in_journal(journal, i));
4236                 dirtied = __mark_sit_entry_dirty(sbi, segno);
4237
4238                 if (!dirtied)
4239                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4240         }
4241         update_sits_in_cursum(journal, -i);
4242         up_write(&curseg->journal_rwsem);
4243 }
4244
4245 /*
4246  * CP calls this function, which flushes SIT entries including sit_journal,
4247  * and moves prefree segs to free segs.
4248  */
4249 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4250 {
4251         struct sit_info *sit_i = SIT_I(sbi);
4252         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4253         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4254         struct f2fs_journal *journal = curseg->journal;
4255         struct sit_entry_set *ses, *tmp;
4256         struct list_head *head = &SM_I(sbi)->sit_entry_set;
4257         bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4258         struct seg_entry *se;
4259
4260         down_write(&sit_i->sentry_lock);
4261
4262         if (!sit_i->dirty_sentries)
4263                 goto out;
4264
4265         /*
4266          * add and account sit entries of dirty bitmap in sit entry
4267          * set temporarily
4268          */
4269         add_sits_in_set(sbi);
4270
4271         /*
4272          * if there are no enough space in journal to store dirty sit
4273          * entries, remove all entries from journal and add and account
4274          * them in sit entry set.
4275          */
4276         if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4277                                                                 !to_journal)
4278                 remove_sits_in_journal(sbi);
4279
4280         /*
4281          * there are two steps to flush sit entries:
4282          * #1, flush sit entries to journal in current cold data summary block.
4283          * #2, flush sit entries to sit page.
4284          */
4285         list_for_each_entry_safe(ses, tmp, head, set_list) {
4286                 struct page *page = NULL;
4287                 struct f2fs_sit_block *raw_sit = NULL;
4288                 unsigned int start_segno = ses->start_segno;
4289                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4290                                                 (unsigned long)MAIN_SEGS(sbi));
4291                 unsigned int segno = start_segno;
4292
4293                 if (to_journal &&
4294                         !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4295                         to_journal = false;
4296
4297                 if (to_journal) {
4298                         down_write(&curseg->journal_rwsem);
4299                 } else {
4300                         page = get_next_sit_page(sbi, start_segno);
4301                         raw_sit = page_address(page);
4302                 }
4303
4304                 /* flush dirty sit entries in region of current sit set */
4305                 for_each_set_bit_from(segno, bitmap, end) {
4306                         int offset, sit_offset;
4307
4308                         se = get_seg_entry(sbi, segno);
4309 #ifdef CONFIG_F2FS_CHECK_FS
4310                         if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4311                                                 SIT_VBLOCK_MAP_SIZE))
4312                                 f2fs_bug_on(sbi, 1);
4313 #endif
4314
4315                         /* add discard candidates */
4316                         if (!(cpc->reason & CP_DISCARD)) {
4317                                 cpc->trim_start = segno;
4318                                 add_discard_addrs(sbi, cpc, false);
4319                         }
4320
4321                         if (to_journal) {
4322                                 offset = f2fs_lookup_journal_in_cursum(journal,
4323                                                         SIT_JOURNAL, segno, 1);
4324                                 f2fs_bug_on(sbi, offset < 0);
4325                                 segno_in_journal(journal, offset) =
4326                                                         cpu_to_le32(segno);
4327                                 seg_info_to_raw_sit(se,
4328                                         &sit_in_journal(journal, offset));
4329                                 check_block_count(sbi, segno,
4330                                         &sit_in_journal(journal, offset));
4331                         } else {
4332                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4333                                 seg_info_to_raw_sit(se,
4334                                                 &raw_sit->entries[sit_offset]);
4335                                 check_block_count(sbi, segno,
4336                                                 &raw_sit->entries[sit_offset]);
4337                         }
4338
4339                         __clear_bit(segno, bitmap);
4340                         sit_i->dirty_sentries--;
4341                         ses->entry_cnt--;
4342                 }
4343
4344                 if (to_journal)
4345                         up_write(&curseg->journal_rwsem);
4346                 else
4347                         f2fs_put_page(page, 1);
4348
4349                 f2fs_bug_on(sbi, ses->entry_cnt);
4350                 release_sit_entry_set(ses);
4351         }
4352
4353         f2fs_bug_on(sbi, !list_empty(head));
4354         f2fs_bug_on(sbi, sit_i->dirty_sentries);
4355 out:
4356         if (cpc->reason & CP_DISCARD) {
4357                 __u64 trim_start = cpc->trim_start;
4358
4359                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4360                         add_discard_addrs(sbi, cpc, false);
4361
4362                 cpc->trim_start = trim_start;
4363         }
4364         up_write(&sit_i->sentry_lock);
4365
4366         set_prefree_as_free_segments(sbi);
4367 }
4368
4369 static int build_sit_info(struct f2fs_sb_info *sbi)
4370 {
4371         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4372         struct sit_info *sit_i;
4373         unsigned int sit_segs, start;
4374         char *src_bitmap, *bitmap;
4375         unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4376         unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4377
4378         /* allocate memory for SIT information */
4379         sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4380         if (!sit_i)
4381                 return -ENOMEM;
4382
4383         SM_I(sbi)->sit_info = sit_i;
4384
4385         sit_i->sentries =
4386                 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4387                                               MAIN_SEGS(sbi)),
4388                               GFP_KERNEL);
4389         if (!sit_i->sentries)
4390                 return -ENOMEM;
4391
4392         main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4393         sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4394                                                                 GFP_KERNEL);
4395         if (!sit_i->dirty_sentries_bitmap)
4396                 return -ENOMEM;
4397
4398 #ifdef CONFIG_F2FS_CHECK_FS
4399         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4400 #else
4401         bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4402 #endif
4403         sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4404         if (!sit_i->bitmap)
4405                 return -ENOMEM;
4406
4407         bitmap = sit_i->bitmap;
4408
4409         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4410                 sit_i->sentries[start].cur_valid_map = bitmap;
4411                 bitmap += SIT_VBLOCK_MAP_SIZE;
4412
4413                 sit_i->sentries[start].ckpt_valid_map = bitmap;
4414                 bitmap += SIT_VBLOCK_MAP_SIZE;
4415
4416 #ifdef CONFIG_F2FS_CHECK_FS
4417                 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4418                 bitmap += SIT_VBLOCK_MAP_SIZE;
4419 #endif
4420
4421                 if (discard_map) {
4422                         sit_i->sentries[start].discard_map = bitmap;
4423                         bitmap += SIT_VBLOCK_MAP_SIZE;
4424                 }
4425         }
4426
4427         sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4428         if (!sit_i->tmp_map)
4429                 return -ENOMEM;
4430
4431         if (__is_large_section(sbi)) {
4432                 sit_i->sec_entries =
4433                         f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4434                                                       MAIN_SECS(sbi)),
4435                                       GFP_KERNEL);
4436                 if (!sit_i->sec_entries)
4437                         return -ENOMEM;
4438         }
4439
4440         /* get information related with SIT */
4441         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4442
4443         /* setup SIT bitmap from ckeckpoint pack */
4444         sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4445         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4446
4447         sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4448         if (!sit_i->sit_bitmap)
4449                 return -ENOMEM;
4450
4451 #ifdef CONFIG_F2FS_CHECK_FS
4452         sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4453                                         sit_bitmap_size, GFP_KERNEL);
4454         if (!sit_i->sit_bitmap_mir)
4455                 return -ENOMEM;
4456
4457         sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4458                                         main_bitmap_size, GFP_KERNEL);
4459         if (!sit_i->invalid_segmap)
4460                 return -ENOMEM;
4461 #endif
4462
4463         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4464         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4465         sit_i->written_valid_blocks = 0;
4466         sit_i->bitmap_size = sit_bitmap_size;
4467         sit_i->dirty_sentries = 0;
4468         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4469         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4470         sit_i->mounted_time = ktime_get_boottime_seconds();
4471         init_rwsem(&sit_i->sentry_lock);
4472         return 0;
4473 }
4474
4475 static int build_free_segmap(struct f2fs_sb_info *sbi)
4476 {
4477         struct free_segmap_info *free_i;
4478         unsigned int bitmap_size, sec_bitmap_size;
4479
4480         /* allocate memory for free segmap information */
4481         free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4482         if (!free_i)
4483                 return -ENOMEM;
4484
4485         SM_I(sbi)->free_info = free_i;
4486
4487         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4488         free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4489         if (!free_i->free_segmap)
4490                 return -ENOMEM;
4491
4492         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4493         free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4494         if (!free_i->free_secmap)
4495                 return -ENOMEM;
4496
4497         /* set all segments as dirty temporarily */
4498         memset(free_i->free_segmap, 0xff, bitmap_size);
4499         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4500
4501         /* init free segmap information */
4502         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4503         free_i->free_segments = 0;
4504         free_i->free_sections = 0;
4505         spin_lock_init(&free_i->segmap_lock);
4506         return 0;
4507 }
4508
4509 static int build_curseg(struct f2fs_sb_info *sbi)
4510 {
4511         struct curseg_info *array;
4512         int i;
4513
4514         array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4515                                         sizeof(*array)), GFP_KERNEL);
4516         if (!array)
4517                 return -ENOMEM;
4518
4519         SM_I(sbi)->curseg_array = array;
4520
4521         for (i = 0; i < NO_CHECK_TYPE; i++) {
4522                 mutex_init(&array[i].curseg_mutex);
4523                 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4524                 if (!array[i].sum_blk)
4525                         return -ENOMEM;
4526                 init_rwsem(&array[i].journal_rwsem);
4527                 array[i].journal = f2fs_kzalloc(sbi,
4528                                 sizeof(struct f2fs_journal), GFP_KERNEL);
4529                 if (!array[i].journal)
4530                         return -ENOMEM;
4531                 if (i < NR_PERSISTENT_LOG)
4532                         array[i].seg_type = CURSEG_HOT_DATA + i;
4533                 else if (i == CURSEG_COLD_DATA_PINNED)
4534                         array[i].seg_type = CURSEG_COLD_DATA;
4535                 else if (i == CURSEG_ALL_DATA_ATGC)
4536                         array[i].seg_type = CURSEG_COLD_DATA;
4537                 array[i].segno = NULL_SEGNO;
4538                 array[i].next_blkoff = 0;
4539                 array[i].inited = false;
4540         }
4541         return restore_curseg_summaries(sbi);
4542 }
4543
4544 static int build_sit_entries(struct f2fs_sb_info *sbi)
4545 {
4546         struct sit_info *sit_i = SIT_I(sbi);
4547         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4548         struct f2fs_journal *journal = curseg->journal;
4549         struct seg_entry *se;
4550         struct f2fs_sit_entry sit;
4551         int sit_blk_cnt = SIT_BLK_CNT(sbi);
4552         unsigned int i, start, end;
4553         unsigned int readed, start_blk = 0;
4554         int err = 0;
4555         block_t sit_valid_blocks[2] = {0, 0};
4556
4557         do {
4558                 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4559                                                         META_SIT, true);
4560
4561                 start = start_blk * sit_i->sents_per_block;
4562                 end = (start_blk + readed) * sit_i->sents_per_block;
4563
4564                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4565                         struct f2fs_sit_block *sit_blk;
4566                         struct page *page;
4567
4568                         se = &sit_i->sentries[start];
4569                         page = get_current_sit_page(sbi, start);
4570                         if (IS_ERR(page))
4571                                 return PTR_ERR(page);
4572                         sit_blk = (struct f2fs_sit_block *)page_address(page);
4573                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4574                         f2fs_put_page(page, 1);
4575
4576                         err = check_block_count(sbi, start, &sit);
4577                         if (err)
4578                                 return err;
4579                         seg_info_from_raw_sit(se, &sit);
4580
4581                         if (se->type >= NR_PERSISTENT_LOG) {
4582                                 f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4583                                                         se->type, start);
4584                                 f2fs_handle_error(sbi,
4585                                                 ERROR_INCONSISTENT_SUM_TYPE);
4586                                 return -EFSCORRUPTED;
4587                         }
4588
4589                         sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4590
4591                         if (f2fs_block_unit_discard(sbi)) {
4592                                 /* build discard map only one time */
4593                                 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4594                                         memset(se->discard_map, 0xff,
4595                                                 SIT_VBLOCK_MAP_SIZE);
4596                                 } else {
4597                                         memcpy(se->discard_map,
4598                                                 se->cur_valid_map,
4599                                                 SIT_VBLOCK_MAP_SIZE);
4600                                         sbi->discard_blks +=
4601                                                 sbi->blocks_per_seg -
4602                                                 se->valid_blocks;
4603                                 }
4604                         }
4605
4606                         if (__is_large_section(sbi))
4607                                 get_sec_entry(sbi, start)->valid_blocks +=
4608                                                         se->valid_blocks;
4609                 }
4610                 start_blk += readed;
4611         } while (start_blk < sit_blk_cnt);
4612
4613         down_read(&curseg->journal_rwsem);
4614         for (i = 0; i < sits_in_cursum(journal); i++) {
4615                 unsigned int old_valid_blocks;
4616
4617                 start = le32_to_cpu(segno_in_journal(journal, i));
4618                 if (start >= MAIN_SEGS(sbi)) {
4619                         f2fs_err(sbi, "Wrong journal entry on segno %u",
4620                                  start);
4621                         err = -EFSCORRUPTED;
4622                         f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4623                         break;
4624                 }
4625
4626                 se = &sit_i->sentries[start];
4627                 sit = sit_in_journal(journal, i);
4628
4629                 old_valid_blocks = se->valid_blocks;
4630
4631                 sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4632
4633                 err = check_block_count(sbi, start, &sit);
4634                 if (err)
4635                         break;
4636                 seg_info_from_raw_sit(se, &sit);
4637
4638                 if (se->type >= NR_PERSISTENT_LOG) {
4639                         f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4640                                                         se->type, start);
4641                         err = -EFSCORRUPTED;
4642                         f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4643                         break;
4644                 }
4645
4646                 sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4647
4648                 if (f2fs_block_unit_discard(sbi)) {
4649                         if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4650                                 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4651                         } else {
4652                                 memcpy(se->discard_map, se->cur_valid_map,
4653                                                         SIT_VBLOCK_MAP_SIZE);
4654                                 sbi->discard_blks += old_valid_blocks;
4655                                 sbi->discard_blks -= se->valid_blocks;
4656                         }
4657                 }
4658
4659                 if (__is_large_section(sbi)) {
4660                         get_sec_entry(sbi, start)->valid_blocks +=
4661                                                         se->valid_blocks;
4662                         get_sec_entry(sbi, start)->valid_blocks -=
4663                                                         old_valid_blocks;
4664                 }
4665         }
4666         up_read(&curseg->journal_rwsem);
4667
4668         if (err)
4669                 return err;
4670
4671         if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4672                 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4673                          sit_valid_blocks[NODE], valid_node_count(sbi));
4674                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4675                 return -EFSCORRUPTED;
4676         }
4677
4678         if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4679                                 valid_user_blocks(sbi)) {
4680                 f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4681                          sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4682                          valid_user_blocks(sbi));
4683                 f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4684                 return -EFSCORRUPTED;
4685         }
4686
4687         return 0;
4688 }
4689
4690 static void init_free_segmap(struct f2fs_sb_info *sbi)
4691 {
4692         unsigned int start;
4693         int type;
4694         struct seg_entry *sentry;
4695
4696         for (start = 0; start < MAIN_SEGS(sbi); start++) {
4697                 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4698                         continue;
4699                 sentry = get_seg_entry(sbi, start);
4700                 if (!sentry->valid_blocks)
4701                         __set_free(sbi, start);
4702                 else
4703                         SIT_I(sbi)->written_valid_blocks +=
4704                                                 sentry->valid_blocks;
4705         }
4706
4707         /* set use the current segments */
4708         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4709                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4710
4711                 __set_test_and_inuse(sbi, curseg_t->segno);
4712         }
4713 }
4714
4715 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4716 {
4717         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4718         struct free_segmap_info *free_i = FREE_I(sbi);
4719         unsigned int segno = 0, offset = 0, secno;
4720         block_t valid_blocks, usable_blks_in_seg;
4721
4722         while (1) {
4723                 /* find dirty segment based on free segmap */
4724                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4725                 if (segno >= MAIN_SEGS(sbi))
4726                         break;
4727                 offset = segno + 1;
4728                 valid_blocks = get_valid_blocks(sbi, segno, false);
4729                 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4730                 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4731                         continue;
4732                 if (valid_blocks > usable_blks_in_seg) {
4733                         f2fs_bug_on(sbi, 1);
4734                         continue;
4735                 }
4736                 mutex_lock(&dirty_i->seglist_lock);
4737                 __locate_dirty_segment(sbi, segno, DIRTY);
4738                 mutex_unlock(&dirty_i->seglist_lock);
4739         }
4740
4741         if (!__is_large_section(sbi))
4742                 return;
4743
4744         mutex_lock(&dirty_i->seglist_lock);
4745         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4746                 valid_blocks = get_valid_blocks(sbi, segno, true);
4747                 secno = GET_SEC_FROM_SEG(sbi, segno);
4748
4749                 if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4750                         continue;
4751                 if (IS_CURSEC(sbi, secno))
4752                         continue;
4753                 set_bit(secno, dirty_i->dirty_secmap);
4754         }
4755         mutex_unlock(&dirty_i->seglist_lock);
4756 }
4757
4758 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4759 {
4760         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4761         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4762
4763         dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4764         if (!dirty_i->victim_secmap)
4765                 return -ENOMEM;
4766
4767         dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4768         if (!dirty_i->pinned_secmap)
4769                 return -ENOMEM;
4770
4771         dirty_i->pinned_secmap_cnt = 0;
4772         dirty_i->enable_pin_section = true;
4773         return 0;
4774 }
4775
4776 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4777 {
4778         struct dirty_seglist_info *dirty_i;
4779         unsigned int bitmap_size, i;
4780
4781         /* allocate memory for dirty segments list information */
4782         dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4783                                                                 GFP_KERNEL);
4784         if (!dirty_i)
4785                 return -ENOMEM;
4786
4787         SM_I(sbi)->dirty_info = dirty_i;
4788         mutex_init(&dirty_i->seglist_lock);
4789
4790         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4791
4792         for (i = 0; i < NR_DIRTY_TYPE; i++) {
4793                 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4794                                                                 GFP_KERNEL);
4795                 if (!dirty_i->dirty_segmap[i])
4796                         return -ENOMEM;
4797         }
4798
4799         if (__is_large_section(sbi)) {
4800                 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4801                 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4802                                                 bitmap_size, GFP_KERNEL);
4803                 if (!dirty_i->dirty_secmap)
4804                         return -ENOMEM;
4805         }
4806
4807         init_dirty_segmap(sbi);
4808         return init_victim_secmap(sbi);
4809 }
4810
4811 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4812 {
4813         int i;
4814
4815         /*
4816          * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4817          * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4818          */
4819         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4820                 struct curseg_info *curseg = CURSEG_I(sbi, i);
4821                 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4822                 unsigned int blkofs = curseg->next_blkoff;
4823
4824                 if (f2fs_sb_has_readonly(sbi) &&
4825                         i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4826                         continue;
4827
4828                 sanity_check_seg_type(sbi, curseg->seg_type);
4829
4830                 if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
4831                         f2fs_err(sbi,
4832                                  "Current segment has invalid alloc_type:%d",
4833                                  curseg->alloc_type);
4834                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4835                         return -EFSCORRUPTED;
4836                 }
4837
4838                 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4839                         goto out;
4840
4841                 if (curseg->alloc_type == SSR)
4842                         continue;
4843
4844                 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4845                         if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4846                                 continue;
4847 out:
4848                         f2fs_err(sbi,
4849                                  "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4850                                  i, curseg->segno, curseg->alloc_type,
4851                                  curseg->next_blkoff, blkofs);
4852                         f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
4853                         return -EFSCORRUPTED;
4854                 }
4855         }
4856         return 0;
4857 }
4858
4859 #ifdef CONFIG_BLK_DEV_ZONED
4860
4861 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4862                                     struct f2fs_dev_info *fdev,
4863                                     struct blk_zone *zone)
4864 {
4865         unsigned int zone_segno;
4866         block_t zone_block, valid_block_cnt;
4867         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4868         int ret;
4869
4870         if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4871                 return 0;
4872
4873         zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4874         zone_segno = GET_SEGNO(sbi, zone_block);
4875
4876         /*
4877          * Skip check of zones cursegs point to, since
4878          * fix_curseg_write_pointer() checks them.
4879          */
4880         if (zone_segno >= MAIN_SEGS(sbi) ||
4881             IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno)))
4882                 return 0;
4883
4884         /*
4885          * Get # of valid block of the zone.
4886          */
4887         valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
4888
4889         if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
4890             (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
4891                 return 0;
4892
4893         if (!valid_block_cnt) {
4894                 f2fs_notice(sbi, "Zone without valid block has non-zero write "
4895                             "pointer. Reset the write pointer: cond[0x%x]",
4896                             zone->cond);
4897                 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4898                                         zone->len >> log_sectors_per_block);
4899                 if (ret)
4900                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4901                                  fdev->path, ret);
4902                 return ret;
4903         }
4904
4905         /*
4906          * If there are valid blocks and the write pointer doesn't match
4907          * with them, we need to report the inconsistency and fill
4908          * the zone till the end to close the zone. This inconsistency
4909          * does not cause write error because the zone will not be
4910          * selected for write operation until it get discarded.
4911          */
4912         f2fs_notice(sbi, "Valid blocks are not aligned with write "
4913                     "pointer: valid block[0x%x,0x%x] cond[0x%x]",
4914                     zone_segno, valid_block_cnt, zone->cond);
4915
4916         ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
4917                                 zone->start, zone->len, GFP_NOFS);
4918         if (ret == -EOPNOTSUPP) {
4919                 ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
4920                                         zone->len - (zone->wp - zone->start),
4921                                         GFP_NOFS, 0);
4922                 if (ret)
4923                         f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
4924                                         fdev->path, ret);
4925         } else if (ret) {
4926                 f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
4927                                 fdev->path, ret);
4928         }
4929
4930         return ret;
4931 }
4932
4933 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4934                                                   block_t zone_blkaddr)
4935 {
4936         int i;
4937
4938         for (i = 0; i < sbi->s_ndevs; i++) {
4939                 if (!bdev_is_zoned(FDEV(i).bdev))
4940                         continue;
4941                 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4942                                 zone_blkaddr <= FDEV(i).end_blk))
4943                         return &FDEV(i);
4944         }
4945
4946         return NULL;
4947 }
4948
4949 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4950                               void *data)
4951 {
4952         memcpy(data, zone, sizeof(struct blk_zone));
4953         return 0;
4954 }
4955
4956 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4957 {
4958         struct curseg_info *cs = CURSEG_I(sbi, type);
4959         struct f2fs_dev_info *zbd;
4960         struct blk_zone zone;
4961         unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4962         block_t cs_zone_block, wp_block;
4963         unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4964         sector_t zone_sector;
4965         int err;
4966
4967         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4968         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4969
4970         zbd = get_target_zoned_dev(sbi, cs_zone_block);
4971         if (!zbd)
4972                 return 0;
4973
4974         /* report zone for the sector the curseg points to */
4975         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4976                 << log_sectors_per_block;
4977         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4978                                   report_one_zone_cb, &zone);
4979         if (err != 1) {
4980                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4981                          zbd->path, err);
4982                 return err;
4983         }
4984
4985         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4986                 return 0;
4987
4988         /*
4989          * When safely unmounted in the previous mount, we could use current
4990          * segments. Otherwise, allocate new sections.
4991          */
4992         if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
4993                 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
4994                 wp_segno = GET_SEGNO(sbi, wp_block);
4995                 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4996                 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
4997
4998                 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
4999                                 wp_sector_off == 0)
5000                         return 0;
5001
5002                 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5003                             "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
5004                             cs->next_blkoff, wp_segno, wp_blkoff);
5005         }
5006
5007         /* Allocate a new section if it's not new. */
5008         if (cs->next_blkoff) {
5009                 unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
5010
5011                 f2fs_allocate_new_section(sbi, type, true);
5012                 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5013                                 "[0x%x,0x%x] -> [0x%x,0x%x]",
5014                                 type, old_segno, old_blkoff,
5015                                 cs->segno, cs->next_blkoff);
5016         }
5017
5018         /* check consistency of the zone curseg pointed to */
5019         if (check_zone_write_pointer(sbi, zbd, &zone))
5020                 return -EIO;
5021
5022         /* check newly assigned zone */
5023         cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5024         cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5025
5026         zbd = get_target_zoned_dev(sbi, cs_zone_block);
5027         if (!zbd)
5028                 return 0;
5029
5030         zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5031                 << log_sectors_per_block;
5032         err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5033                                   report_one_zone_cb, &zone);
5034         if (err != 1) {
5035                 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5036                          zbd->path, err);
5037                 return err;
5038         }
5039
5040         if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5041                 return 0;
5042
5043         if (zone.wp != zone.start) {
5044                 f2fs_notice(sbi,
5045                             "New zone for curseg[%d] is not yet discarded. "
5046                             "Reset the zone: curseg[0x%x,0x%x]",
5047                             type, cs->segno, cs->next_blkoff);
5048                 err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
5049                                         zone.len >> log_sectors_per_block);
5050                 if (err) {
5051                         f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5052                                  zbd->path, err);
5053                         return err;
5054                 }
5055         }
5056
5057         return 0;
5058 }
5059
5060 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5061 {
5062         int i, ret;
5063
5064         for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5065                 ret = fix_curseg_write_pointer(sbi, i);
5066                 if (ret)
5067                         return ret;
5068         }
5069
5070         return 0;
5071 }
5072
5073 struct check_zone_write_pointer_args {
5074         struct f2fs_sb_info *sbi;
5075         struct f2fs_dev_info *fdev;
5076 };
5077
5078 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5079                                       void *data)
5080 {
5081         struct check_zone_write_pointer_args *args;
5082
5083         args = (struct check_zone_write_pointer_args *)data;
5084
5085         return check_zone_write_pointer(args->sbi, args->fdev, zone);
5086 }
5087
5088 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5089 {
5090         int i, ret;
5091         struct check_zone_write_pointer_args args;
5092
5093         for (i = 0; i < sbi->s_ndevs; i++) {
5094                 if (!bdev_is_zoned(FDEV(i).bdev))
5095                         continue;
5096
5097                 args.sbi = sbi;
5098                 args.fdev = &FDEV(i);
5099                 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5100                                           check_zone_write_pointer_cb, &args);
5101                 if (ret < 0)
5102                         return ret;
5103         }
5104
5105         return 0;
5106 }
5107
5108 /*
5109  * Return the number of usable blocks in a segment. The number of blocks
5110  * returned is always equal to the number of blocks in a segment for
5111  * segments fully contained within a sequential zone capacity or a
5112  * conventional zone. For segments partially contained in a sequential
5113  * zone capacity, the number of usable blocks up to the zone capacity
5114  * is returned. 0 is returned in all other cases.
5115  */
5116 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5117                         struct f2fs_sb_info *sbi, unsigned int segno)
5118 {
5119         block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5120         unsigned int secno;
5121
5122         if (!sbi->unusable_blocks_per_sec)
5123                 return sbi->blocks_per_seg;
5124
5125         secno = GET_SEC_FROM_SEG(sbi, segno);
5126         seg_start = START_BLOCK(sbi, segno);
5127         sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5128         sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5129
5130         /*
5131          * If segment starts before zone capacity and spans beyond
5132          * zone capacity, then usable blocks are from seg start to
5133          * zone capacity. If the segment starts after the zone capacity,
5134          * then there are no usable blocks.
5135          */
5136         if (seg_start >= sec_cap_blkaddr)
5137                 return 0;
5138         if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5139                 return sec_cap_blkaddr - seg_start;
5140
5141         return sbi->blocks_per_seg;
5142 }
5143 #else
5144 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5145 {
5146         return 0;
5147 }
5148
5149 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5150 {
5151         return 0;
5152 }
5153
5154 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5155                                                         unsigned int segno)
5156 {
5157         return 0;
5158 }
5159
5160 #endif
5161 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5162                                         unsigned int segno)
5163 {
5164         if (f2fs_sb_has_blkzoned(sbi))
5165                 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5166
5167         return sbi->blocks_per_seg;
5168 }
5169
5170 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5171                                         unsigned int segno)
5172 {
5173         if (f2fs_sb_has_blkzoned(sbi))
5174                 return CAP_SEGS_PER_SEC(sbi);
5175
5176         return sbi->segs_per_sec;
5177 }
5178
5179 /*
5180  * Update min, max modified time for cost-benefit GC algorithm
5181  */
5182 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5183 {
5184         struct sit_info *sit_i = SIT_I(sbi);
5185         unsigned int segno;
5186
5187         down_write(&sit_i->sentry_lock);
5188
5189         sit_i->min_mtime = ULLONG_MAX;
5190
5191         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5192                 unsigned int i;
5193                 unsigned long long mtime = 0;
5194
5195                 for (i = 0; i < sbi->segs_per_sec; i++)
5196                         mtime += get_seg_entry(sbi, segno + i)->mtime;
5197
5198                 mtime = div_u64(mtime, sbi->segs_per_sec);
5199
5200                 if (sit_i->min_mtime > mtime)
5201                         sit_i->min_mtime = mtime;
5202         }
5203         sit_i->max_mtime = get_mtime(sbi, false);
5204         sit_i->dirty_max_mtime = 0;
5205         up_write(&sit_i->sentry_lock);
5206 }
5207
5208 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5209 {
5210         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5211         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5212         struct f2fs_sm_info *sm_info;
5213         int err;
5214
5215         sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5216         if (!sm_info)
5217                 return -ENOMEM;
5218
5219         /* init sm info */
5220         sbi->sm_info = sm_info;
5221         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5222         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5223         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5224         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5225         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5226         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5227         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5228         sm_info->rec_prefree_segments = sm_info->main_segments *
5229                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5230         if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5231                 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5232
5233         if (!f2fs_lfs_mode(sbi))
5234                 sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5235         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5236         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5237         sm_info->min_seq_blocks = sbi->blocks_per_seg;
5238         sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5239         sm_info->min_ssr_sections = reserved_sections(sbi);
5240
5241         INIT_LIST_HEAD(&sm_info->sit_entry_set);
5242
5243         init_f2fs_rwsem(&sm_info->curseg_lock);
5244
5245         err = f2fs_create_flush_cmd_control(sbi);
5246         if (err)
5247                 return err;
5248
5249         err = create_discard_cmd_control(sbi);
5250         if (err)
5251                 return err;
5252
5253         err = build_sit_info(sbi);
5254         if (err)
5255                 return err;
5256         err = build_free_segmap(sbi);
5257         if (err)
5258                 return err;
5259         err = build_curseg(sbi);
5260         if (err)
5261                 return err;
5262
5263         /* reinit free segmap based on SIT */
5264         err = build_sit_entries(sbi);
5265         if (err)
5266                 return err;
5267
5268         init_free_segmap(sbi);
5269         err = build_dirty_segmap(sbi);
5270         if (err)
5271                 return err;
5272
5273         err = sanity_check_curseg(sbi);
5274         if (err)
5275                 return err;
5276
5277         init_min_max_mtime(sbi);
5278         return 0;
5279 }
5280
5281 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5282                 enum dirty_type dirty_type)
5283 {
5284         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5285
5286         mutex_lock(&dirty_i->seglist_lock);
5287         kvfree(dirty_i->dirty_segmap[dirty_type]);
5288         dirty_i->nr_dirty[dirty_type] = 0;
5289         mutex_unlock(&dirty_i->seglist_lock);
5290 }
5291
5292 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5293 {
5294         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5295
5296         kvfree(dirty_i->pinned_secmap);
5297         kvfree(dirty_i->victim_secmap);
5298 }
5299
5300 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5301 {
5302         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5303         int i;
5304
5305         if (!dirty_i)
5306                 return;
5307
5308         /* discard pre-free/dirty segments list */
5309         for (i = 0; i < NR_DIRTY_TYPE; i++)
5310                 discard_dirty_segmap(sbi, i);
5311
5312         if (__is_large_section(sbi)) {
5313                 mutex_lock(&dirty_i->seglist_lock);
5314                 kvfree(dirty_i->dirty_secmap);
5315                 mutex_unlock(&dirty_i->seglist_lock);
5316         }
5317
5318         destroy_victim_secmap(sbi);
5319         SM_I(sbi)->dirty_info = NULL;
5320         kfree(dirty_i);
5321 }
5322
5323 static void destroy_curseg(struct f2fs_sb_info *sbi)
5324 {
5325         struct curseg_info *array = SM_I(sbi)->curseg_array;
5326         int i;
5327
5328         if (!array)
5329                 return;
5330         SM_I(sbi)->curseg_array = NULL;
5331         for (i = 0; i < NR_CURSEG_TYPE; i++) {
5332                 kfree(array[i].sum_blk);
5333                 kfree(array[i].journal);
5334         }
5335         kfree(array);
5336 }
5337
5338 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5339 {
5340         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5341
5342         if (!free_i)
5343                 return;
5344         SM_I(sbi)->free_info = NULL;
5345         kvfree(free_i->free_segmap);
5346         kvfree(free_i->free_secmap);
5347         kfree(free_i);
5348 }
5349
5350 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5351 {
5352         struct sit_info *sit_i = SIT_I(sbi);
5353
5354         if (!sit_i)
5355                 return;
5356
5357         if (sit_i->sentries)
5358                 kvfree(sit_i->bitmap);
5359         kfree(sit_i->tmp_map);
5360
5361         kvfree(sit_i->sentries);
5362         kvfree(sit_i->sec_entries);
5363         kvfree(sit_i->dirty_sentries_bitmap);
5364
5365         SM_I(sbi)->sit_info = NULL;
5366         kvfree(sit_i->sit_bitmap);
5367 #ifdef CONFIG_F2FS_CHECK_FS
5368         kvfree(sit_i->sit_bitmap_mir);
5369         kvfree(sit_i->invalid_segmap);
5370 #endif
5371         kfree(sit_i);
5372 }
5373
5374 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5375 {
5376         struct f2fs_sm_info *sm_info = SM_I(sbi);
5377
5378         if (!sm_info)
5379                 return;
5380         f2fs_destroy_flush_cmd_control(sbi, true);
5381         destroy_discard_cmd_control(sbi);
5382         destroy_dirty_segmap(sbi);
5383         destroy_curseg(sbi);
5384         destroy_free_segmap(sbi);
5385         destroy_sit_info(sbi);
5386         sbi->sm_info = NULL;
5387         kfree(sm_info);
5388 }
5389
5390 int __init f2fs_create_segment_manager_caches(void)
5391 {
5392         discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5393                         sizeof(struct discard_entry));
5394         if (!discard_entry_slab)
5395                 goto fail;
5396
5397         discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5398                         sizeof(struct discard_cmd));
5399         if (!discard_cmd_slab)
5400                 goto destroy_discard_entry;
5401
5402         sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5403                         sizeof(struct sit_entry_set));
5404         if (!sit_entry_set_slab)
5405                 goto destroy_discard_cmd;
5406
5407         revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5408                         sizeof(struct revoke_entry));
5409         if (!revoke_entry_slab)
5410                 goto destroy_sit_entry_set;
5411         return 0;
5412
5413 destroy_sit_entry_set:
5414         kmem_cache_destroy(sit_entry_set_slab);
5415 destroy_discard_cmd:
5416         kmem_cache_destroy(discard_cmd_slab);
5417 destroy_discard_entry:
5418         kmem_cache_destroy(discard_entry_slab);
5419 fail:
5420         return -ENOMEM;
5421 }
5422
5423 void f2fs_destroy_segment_manager_caches(void)
5424 {
5425         kmem_cache_destroy(sit_entry_set_slab);
5426         kmem_cache_destroy(discard_cmd_slab);
5427         kmem_cache_destroy(discard_entry_slab);
5428         kmem_cache_destroy(revoke_entry_slab);
5429 }